{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:3PPKYF6QJ5LWY6FAJHIMVD77S2","short_pith_number":"pith:3PPKYF6Q","canonical_record":{"source":{"id":"1901.09107","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-25T22:23:39Z","cross_cats_sorted":[],"title_canon_sha256":"d5df97e57ad1c3609be7a59d42dc82facd5457786658f7f4fa647c68c2579fb0","abstract_canon_sha256":"a1cbc72966e1deccd7376348718825b78abe4d121c94c2e01d6e83bbe6afaa2c"},"schema_version":"1.0"},"canonical_sha256":"dbdeac17d04f576c78a049d0ca8fff9686474cb8011ec53d9f4aff208450540d","source":{"kind":"arxiv","id":"1901.09107","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.09107","created_at":"2026-05-17T23:46:41Z"},{"alias_kind":"arxiv_version","alias_value":"1901.09107v2","created_at":"2026-05-17T23:46:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.09107","created_at":"2026-05-17T23:46:41Z"},{"alias_kind":"pith_short_12","alias_value":"3PPKYF6QJ5LW","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"3PPKYF6QJ5LWY6FA","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"3PPKYF6Q","created_at":"2026-05-18T12:33:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:3PPKYF6QJ5LWY6FAJHIMVD77S2","target":"record","payload":{"canonical_record":{"source":{"id":"1901.09107","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-25T22:23:39Z","cross_cats_sorted":[],"title_canon_sha256":"d5df97e57ad1c3609be7a59d42dc82facd5457786658f7f4fa647c68c2579fb0","abstract_canon_sha256":"a1cbc72966e1deccd7376348718825b78abe4d121c94c2e01d6e83bbe6afaa2c"},"schema_version":"1.0"},"canonical_sha256":"dbdeac17d04f576c78a049d0ca8fff9686474cb8011ec53d9f4aff208450540d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:41.151324Z","signature_b64":"HS9CmNavjfFfpEheK3K6uozRhQmYyYs9vTqaDMBiqtxT1svBL5eRrSVeejOl+QZmotLS1RcDebzG879dKjMECw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dbdeac17d04f576c78a049d0ca8fff9686474cb8011ec53d9f4aff208450540d","last_reissued_at":"2026-05-17T23:46:41.150830Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:41.150830Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.09107","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:46:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qpgQYMLSlLTwdiVhgsh8ykMGvyf98gEt9cnrs7P7F2U+hYs6V1YLSQBktkPt03BE4VboX6kmkGRZ8wIhOar5Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T23:50:24.421683Z"},"content_sha256":"bb76c65c70edc0ecad2d0da0ff84df28861bc84231a4082c39132b4fef65dd0e","schema_version":"1.0","event_id":"sha256:bb76c65c70edc0ecad2d0da0ff84df28861bc84231a4082c39132b4fef65dd0e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:3PPKYF6QJ5LWY6FAJHIMVD77S2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Audio-Visual Scene-Aware Dialog","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Abhishek Das, Anoop Cherian, Chiori Hori, Devi Parikh, Dhruv Batra, Huda Alamri, Irfan Essa, Jue Wang, Peter Anderson, Stefan Lee, Tim K. Marks, Vincent Cartillier","submitted_at":"2019-01-25T22:23:39Z","abstract_excerpt":"We introduce the task of scene-aware dialog. Our goal is to generate a complete and natural response to a question about a scene, given video and audio of the scene and the history of previous turns in the dialog. To answer successfully, agents must ground concepts from the question in the video while leveraging contextual cues from the dialog history. To benchmark this task, we introduce the Audio Visual Scene-Aware Dialog (AVSD) Dataset. For each of more than 11,000 videos of human actions from the Charades dataset, our dataset contains a dialog about the video, plus a final summary of the v"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.09107","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:46:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5pEbGM37CDUbNv4w3fVlc4K6mFunMr5pgXH3I7S9NiIjv00hH3PIse0mfc8/9P6XnlzQY3PnWSGY2VafKOHNCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T23:50:24.422326Z"},"content_sha256":"457605a9e28023dfc6d2b89ad5d0b5943d1f4761ac14233b909a25806be810bc","schema_version":"1.0","event_id":"sha256:457605a9e28023dfc6d2b89ad5d0b5943d1f4761ac14233b909a25806be810bc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2/bundle.json","state_url":"https://pith.science/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T23:50:24Z","links":{"resolver":"https://pith.science/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2","bundle":"https://pith.science/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2/bundle.json","state":"https://pith.science/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3PPKYF6QJ5LWY6FAJHIMVD77S2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:3PPKYF6QJ5LWY6FAJHIMVD77S2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a1cbc72966e1deccd7376348718825b78abe4d121c94c2e01d6e83bbe6afaa2c","cross_cats_sorted":[],"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-25T22:23:39Z","title_canon_sha256":"d5df97e57ad1c3609be7a59d42dc82facd5457786658f7f4fa647c68c2579fb0"},"schema_version":"1.0","source":{"id":"1901.09107","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.09107","created_at":"2026-05-17T23:46:41Z"},{"alias_kind":"arxiv_version","alias_value":"1901.09107v2","created_at":"2026-05-17T23:46:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.09107","created_at":"2026-05-17T23:46:41Z"},{"alias_kind":"pith_short_12","alias_value":"3PPKYF6QJ5LW","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_16","alias_value":"3PPKYF6QJ5LWY6FA","created_at":"2026-05-18T12:33:07Z"},{"alias_kind":"pith_short_8","alias_value":"3PPKYF6Q","created_at":"2026-05-18T12:33:07Z"}],"graph_snapshots":[{"event_id":"sha256:457605a9e28023dfc6d2b89ad5d0b5943d1f4761ac14233b909a25806be810bc","target":"graph","created_at":"2026-05-17T23:46:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce the task of scene-aware dialog. Our goal is to generate a complete and natural response to a question about a scene, given video and audio of the scene and the history of previous turns in the dialog. To answer successfully, agents must ground concepts from the question in the video while leveraging contextual cues from the dialog history. To benchmark this task, we introduce the Audio Visual Scene-Aware Dialog (AVSD) Dataset. For each of more than 11,000 videos of human actions from the Charades dataset, our dataset contains a dialog about the video, plus a final summary of the v","authors_text":"Abhishek Das, Anoop Cherian, Chiori Hori, Devi Parikh, Dhruv Batra, Huda Alamri, Irfan Essa, Jue Wang, Peter Anderson, Stefan Lee, Tim K. Marks, Vincent Cartillier","cross_cats":[],"headline":"","license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-25T22:23:39Z","title":"Audio-Visual Scene-Aware Dialog"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.09107","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bb76c65c70edc0ecad2d0da0ff84df28861bc84231a4082c39132b4fef65dd0e","target":"record","created_at":"2026-05-17T23:46:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a1cbc72966e1deccd7376348718825b78abe4d121c94c2e01d6e83bbe6afaa2c","cross_cats_sorted":[],"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-25T22:23:39Z","title_canon_sha256":"d5df97e57ad1c3609be7a59d42dc82facd5457786658f7f4fa647c68c2579fb0"},"schema_version":"1.0","source":{"id":"1901.09107","kind":"arxiv","version":2}},"canonical_sha256":"dbdeac17d04f576c78a049d0ca8fff9686474cb8011ec53d9f4aff208450540d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dbdeac17d04f576c78a049d0ca8fff9686474cb8011ec53d9f4aff208450540d","first_computed_at":"2026-05-17T23:46:41.150830Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:46:41.150830Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HS9CmNavjfFfpEheK3K6uozRhQmYyYs9vTqaDMBiqtxT1svBL5eRrSVeejOl+QZmotLS1RcDebzG879dKjMECw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:46:41.151324Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.09107","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bb76c65c70edc0ecad2d0da0ff84df28861bc84231a4082c39132b4fef65dd0e","sha256:457605a9e28023dfc6d2b89ad5d0b5943d1f4761ac14233b909a25806be810bc"],"state_sha256":"32d84ced75cc65d86f55855db8526da08cd04425ce8c19af75dfe659095b613c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hs7HtI359Ah8l+miHmm+FdJsoRd/y00rlai3mqceeLX+fLHej1PxeNj+iqQrDNEm3xh98LiFecsnpNQnK5jBAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T23:50:24.424482Z","bundle_sha256":"91125096e4485b245f7661aefff0422f7ae6ac49751de17d5e9b4d4273804716"}}