{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:TTMYLTMS7PP3B5S6QFQMWD3BGH","short_pith_number":"pith:TTMYLTMS","canonical_record":{"source":{"id":"1505.05914","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-05-21T21:47:08Z","cross_cats_sorted":[],"title_canon_sha256":"826da0abb969b77dcc41a445a15d16078b346738079248a1d186d9f8b3866122","abstract_canon_sha256":"d77c6dd1a1b12fa92364abf9105db015cb0a447c0298ad45908d90e40dd89984"},"schema_version":"1.0"},"canonical_sha256":"9cd985cd92fbdfb0f65e8160cb0f6131d8b552011ae084b08dd1001d9caa5b99","source":{"kind":"arxiv","id":"1505.05914","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.05914","created_at":"2026-05-18T01:18:52Z"},{"alias_kind":"arxiv_version","alias_value":"1505.05914v3","created_at":"2026-05-18T01:18:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.05914","created_at":"2026-05-18T01:18:52Z"},{"alias_kind":"pith_short_12","alias_value":"TTMYLTMS7PP3","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_16","alias_value":"TTMYLTMS7PP3B5S6","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_8","alias_value":"TTMYLTMS","created_at":"2026-05-18T12:29:42Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:TTMYLTMS7PP3B5S6QFQMWD3BGH","target":"record","payload":{"canonical_record":{"source":{"id":"1505.05914","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-05-21T21:47:08Z","cross_cats_sorted":[],"title_canon_sha256":"826da0abb969b77dcc41a445a15d16078b346738079248a1d186d9f8b3866122","abstract_canon_sha256":"d77c6dd1a1b12fa92364abf9105db015cb0a447c0298ad45908d90e40dd89984"},"schema_version":"1.0"},"canonical_sha256":"9cd985cd92fbdfb0f65e8160cb0f6131d8b552011ae084b08dd1001d9caa5b99","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:18:52.681148Z","signature_b64":"Gs7TMd052zvNjimcN5+PdtGUjKKxIARdHJ+N/M4W+BpQbeU2UTTUq6tiQaRrfPmUSxCb8I12NkHuagm43hDCCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9cd985cd92fbdfb0f65e8160cb0f6131d8b552011ae084b08dd1001d9caa5b99","last_reissued_at":"2026-05-18T01:18:52.680511Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:18:52.680511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1505.05914","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:18:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hwQbA5tKhvTLwVOulJuz+4XS7RPURPL3uZ8HzMsJroqmJoURvEWXhqpCVtf6EuGzynhlIq9crJL12nMfgG6nCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T21:07:45.687448Z"},"content_sha256":"083ca5ea13816bac2926514f83adc6c24b3194ec0670ac45a93586e3fe41cdfc","schema_version":"1.0","event_id":"sha256:083ca5ea13816bac2926514f83adc6c24b3194ec0670ac45a93586e3fe41cdfc"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:TTMYLTMS7PP3B5S6QFQMWD3BGH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Multi-scale Multiple Instance Video Description Network","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Huijuan Xu, Kate Saenko, Marcus Rohrbach, Subhashini Venugopalan, Vasili Ramanishka","submitted_at":"2015-05-21T21:47:08Z","abstract_excerpt":"Generating natural language descriptions for in-the-wild videos is a challenging task. Most state-of-the-art methods for solving this problem borrow existing deep convolutional neural network (CNN) architectures (AlexNet, GoogLeNet) to extract a visual representation of the input video. However, these deep CNN architectures are designed for single-label centered-positioned object classification. While they generate strong semantic features, they have no inherent structure allowing them to detect multiple objects of different sizes and locations in the frame. Our paper tries to solve this probl"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.05914","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:18:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KMXj/BmiAjyzMf2HGPes3wpy83trc68Qc2xJ48uxhaE4G0gbBetXqpN79z/n/vxG1789OT1NylPo1Jw8QI8ZBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T21:07:45.687797Z"},"content_sha256":"d3a4b1b60a659f74d6966fa19c12e71d85582dde6230226718c673153f3caa12","schema_version":"1.0","event_id":"sha256:d3a4b1b60a659f74d6966fa19c12e71d85582dde6230226718c673153f3caa12"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH/bundle.json","state_url":"https://pith.science/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-19T21:07:45Z","links":{"resolver":"https://pith.science/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH","bundle":"https://pith.science/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH/bundle.json","state":"https://pith.science/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/TTMYLTMS7PP3B5S6QFQMWD3BGH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:TTMYLTMS7PP3B5S6QFQMWD3BGH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d77c6dd1a1b12fa92364abf9105db015cb0a447c0298ad45908d90e40dd89984","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-05-21T21:47:08Z","title_canon_sha256":"826da0abb969b77dcc41a445a15d16078b346738079248a1d186d9f8b3866122"},"schema_version":"1.0","source":{"id":"1505.05914","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.05914","created_at":"2026-05-18T01:18:52Z"},{"alias_kind":"arxiv_version","alias_value":"1505.05914v3","created_at":"2026-05-18T01:18:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.05914","created_at":"2026-05-18T01:18:52Z"},{"alias_kind":"pith_short_12","alias_value":"TTMYLTMS7PP3","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_16","alias_value":"TTMYLTMS7PP3B5S6","created_at":"2026-05-18T12:29:42Z"},{"alias_kind":"pith_short_8","alias_value":"TTMYLTMS","created_at":"2026-05-18T12:29:42Z"}],"graph_snapshots":[{"event_id":"sha256:d3a4b1b60a659f74d6966fa19c12e71d85582dde6230226718c673153f3caa12","target":"graph","created_at":"2026-05-18T01:18:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Generating natural language descriptions for in-the-wild videos is a challenging task. Most state-of-the-art methods for solving this problem borrow existing deep convolutional neural network (CNN) architectures (AlexNet, GoogLeNet) to extract a visual representation of the input video. However, these deep CNN architectures are designed for single-label centered-positioned object classification. While they generate strong semantic features, they have no inherent structure allowing them to detect multiple objects of different sizes and locations in the frame. Our paper tries to solve this probl","authors_text":"Huijuan Xu, Kate Saenko, Marcus Rohrbach, Subhashini Venugopalan, Vasili Ramanishka","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-05-21T21:47:08Z","title":"A Multi-scale Multiple Instance Video Description Network"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.05914","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:083ca5ea13816bac2926514f83adc6c24b3194ec0670ac45a93586e3fe41cdfc","target":"record","created_at":"2026-05-18T01:18:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d77c6dd1a1b12fa92364abf9105db015cb0a447c0298ad45908d90e40dd89984","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-05-21T21:47:08Z","title_canon_sha256":"826da0abb969b77dcc41a445a15d16078b346738079248a1d186d9f8b3866122"},"schema_version":"1.0","source":{"id":"1505.05914","kind":"arxiv","version":3}},"canonical_sha256":"9cd985cd92fbdfb0f65e8160cb0f6131d8b552011ae084b08dd1001d9caa5b99","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9cd985cd92fbdfb0f65e8160cb0f6131d8b552011ae084b08dd1001d9caa5b99","first_computed_at":"2026-05-18T01:18:52.680511Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:18:52.680511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Gs7TMd052zvNjimcN5+PdtGUjKKxIARdHJ+N/M4W+BpQbeU2UTTUq6tiQaRrfPmUSxCb8I12NkHuagm43hDCCg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:18:52.681148Z","signed_message":"canonical_sha256_bytes"},"source_id":"1505.05914","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:083ca5ea13816bac2926514f83adc6c24b3194ec0670ac45a93586e3fe41cdfc","sha256:d3a4b1b60a659f74d6966fa19c12e71d85582dde6230226718c673153f3caa12"],"state_sha256":"c178caae527c2e4785a42f08225db5a9f73c128725adbf329d1b9ea8ee097233"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mOVaqr/YZwVim1fxr1nCKbUMl0FIulVoGji0C8rwXIPKUh1Jy7x0uyoA88iK+aIJKCqRQerhUHyLdT118vSDAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-19T21:07:45.689753Z","bundle_sha256":"482d97207050ea43efcaabdeace590f80ae0d586dee5bf583f0d28b332080649"}}