{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:6YEKD6F3NXIPBFTKSOXD5JEVXK","short_pith_number":"pith:6YEKD6F3","canonical_record":{"source":{"id":"1608.04959","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-17T13:30:06Z","cross_cats_sorted":[],"title_canon_sha256":"90c066d4a525147a4277681b348e1e7e4273b6fcf68fc095f9552bcc2fe0843c","abstract_canon_sha256":"ca93f214cbd1427bffe3a0b6a1a1acc3cc4b661573653bc0c6629082df47479e"},"schema_version":"1.0"},"canonical_sha256":"f608a1f8bb6dd0f0966a93ae3ea495baaf71fbd4397c5be2078a7c1393c91b45","source":{"kind":"arxiv","id":"1608.04959","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1608.04959","created_at":"2026-05-18T01:08:33Z"},{"alias_kind":"arxiv_version","alias_value":"1608.04959v1","created_at":"2026-05-18T01:08:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1608.04959","created_at":"2026-05-18T01:08:33Z"},{"alias_kind":"pith_short_12","alias_value":"6YEKD6F3NXIP","created_at":"2026-05-18T12:30:04Z"},{"alias_kind":"pith_short_16","alias_value":"6YEKD6F3NXIPBFTK","created_at":"2026-05-18T12:30:04Z"},{"alias_kind":"pith_short_8","alias_value":"6YEKD6F3","created_at":"2026-05-18T12:30:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:6YEKD6F3NXIPBFTKSOXD5JEVXK","target":"record","payload":{"canonical_record":{"source":{"id":"1608.04959","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-17T13:30:06Z","cross_cats_sorted":[],"title_canon_sha256":"90c066d4a525147a4277681b348e1e7e4273b6fcf68fc095f9552bcc2fe0843c","abstract_canon_sha256":"ca93f214cbd1427bffe3a0b6a1a1acc3cc4b661573653bc0c6629082df47479e"},"schema_version":"1.0"},"canonical_sha256":"f608a1f8bb6dd0f0966a93ae3ea495baaf71fbd4397c5be2078a7c1393c91b45","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:08:33.138753Z","signature_b64":"NOnlNfKT24945CnaEXIMvXQybzRHRjzMSV9L6XBei+BT3lsiNl8C89wbib3SoW8DzoVTb3FJ5wpz+BVYqhlEBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f608a1f8bb6dd0f0966a93ae3ea495baaf71fbd4397c5be2078a7c1393c91b45","last_reissued_at":"2026-05-18T01:08:33.138163Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:08:33.138163Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1608.04959","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:08:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GkqXcXGwQB5g3dy7XNEsykzVuJWUp71N5gat5a7ErwVjo4b3gt9TCeJIbyZO4pdTH7FY1/1uIM4savRljKi0Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T00:35:37.048390Z"},"content_sha256":"c30923a4d1af69597b54de4e6789240f713daca3ae81f9a0f21c0188b39f1178","schema_version":"1.0","event_id":"sha256:c30923a4d1af69597b54de4e6789240f713daca3ae81f9a0f21c0188b39f1178"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:6YEKD6F3NXIPBFTKSOXD5JEVXK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Frame- and Segment-Level Features and Candidate Pool Evaluation for Video Caption Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Jorma Laaksonen, Rakshith Shetty","submitted_at":"2016-08-17T13:30:06Z","abstract_excerpt":"We present our submission to the Microsoft Video to Language Challenge of generating short captions describing videos in the challenge dataset. Our model is based on the encoder--decoder pipeline, popular in image and video captioning systems. We propose to utilize two different kinds of video features, one to capture the video content in terms of objects and attributes, and the other to capture the motion and action information. Using these diverse features we train models specializing in two separate input sub-domains. We then train an evaluator model which is used to pick the best caption f"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1608.04959","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:08:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ekmytgfZ4tIwhdLj1JjszO1La57XxEAqr1YuJ8nAGVT4BagABnAt/HVoYAr5DM0P0PeMnTXEqAvp7YGFuZPSDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T00:35:37.049026Z"},"content_sha256":"9f09d319d61a0627386f9794fbf8f5b9077ac0cb78eb77a8ac88c2b2473386b1","schema_version":"1.0","event_id":"sha256:9f09d319d61a0627386f9794fbf8f5b9077ac0cb78eb77a8ac88c2b2473386b1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK/bundle.json","state_url":"https://pith.science/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T00:35:37Z","links":{"resolver":"https://pith.science/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK","bundle":"https://pith.science/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK/bundle.json","state":"https://pith.science/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6YEKD6F3NXIPBFTKSOXD5JEVXK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:6YEKD6F3NXIPBFTKSOXD5JEVXK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ca93f214cbd1427bffe3a0b6a1a1acc3cc4b661573653bc0c6629082df47479e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-17T13:30:06Z","title_canon_sha256":"90c066d4a525147a4277681b348e1e7e4273b6fcf68fc095f9552bcc2fe0843c"},"schema_version":"1.0","source":{"id":"1608.04959","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1608.04959","created_at":"2026-05-18T01:08:33Z"},{"alias_kind":"arxiv_version","alias_value":"1608.04959v1","created_at":"2026-05-18T01:08:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1608.04959","created_at":"2026-05-18T01:08:33Z"},{"alias_kind":"pith_short_12","alias_value":"6YEKD6F3NXIP","created_at":"2026-05-18T12:30:04Z"},{"alias_kind":"pith_short_16","alias_value":"6YEKD6F3NXIPBFTK","created_at":"2026-05-18T12:30:04Z"},{"alias_kind":"pith_short_8","alias_value":"6YEKD6F3","created_at":"2026-05-18T12:30:04Z"}],"graph_snapshots":[{"event_id":"sha256:9f09d319d61a0627386f9794fbf8f5b9077ac0cb78eb77a8ac88c2b2473386b1","target":"graph","created_at":"2026-05-18T01:08:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present our submission to the Microsoft Video to Language Challenge of generating short captions describing videos in the challenge dataset. Our model is based on the encoder--decoder pipeline, popular in image and video captioning systems. We propose to utilize two different kinds of video features, one to capture the video content in terms of objects and attributes, and the other to capture the motion and action information. Using these diverse features we train models specializing in two separate input sub-domains. We then train an evaluator model which is used to pick the best caption f","authors_text":"Jorma Laaksonen, Rakshith Shetty","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-17T13:30:06Z","title":"Frame- and Segment-Level Features and Candidate Pool Evaluation for Video Caption Generation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1608.04959","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c30923a4d1af69597b54de4e6789240f713daca3ae81f9a0f21c0188b39f1178","target":"record","created_at":"2026-05-18T01:08:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ca93f214cbd1427bffe3a0b6a1a1acc3cc4b661573653bc0c6629082df47479e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-17T13:30:06Z","title_canon_sha256":"90c066d4a525147a4277681b348e1e7e4273b6fcf68fc095f9552bcc2fe0843c"},"schema_version":"1.0","source":{"id":"1608.04959","kind":"arxiv","version":1}},"canonical_sha256":"f608a1f8bb6dd0f0966a93ae3ea495baaf71fbd4397c5be2078a7c1393c91b45","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f608a1f8bb6dd0f0966a93ae3ea495baaf71fbd4397c5be2078a7c1393c91b45","first_computed_at":"2026-05-18T01:08:33.138163Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:08:33.138163Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NOnlNfKT24945CnaEXIMvXQybzRHRjzMSV9L6XBei+BT3lsiNl8C89wbib3SoW8DzoVTb3FJ5wpz+BVYqhlEBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:08:33.138753Z","signed_message":"canonical_sha256_bytes"},"source_id":"1608.04959","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c30923a4d1af69597b54de4e6789240f713daca3ae81f9a0f21c0188b39f1178","sha256:9f09d319d61a0627386f9794fbf8f5b9077ac0cb78eb77a8ac88c2b2473386b1"],"state_sha256":"5af0a2326761dde1f981810aa0fcef044b94beb086d9c1de765b8b8ba0575e2f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NYHy5xb3YpRYUf9rq0VfuHCINCT+5N3di2O9Qj+Bp0BvJ83gFck6cvRH+DkuIdJprTyVwjsu2tOh+4xA7McnBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T00:35:37.051766Z","bundle_sha256":"09e1b7ab53a174537d8b7f971eb478b85caf76d1a7f4af9092d111f2a92d211f"}}