{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:P4XXDSU32O7NMVI2IYOHC2XM3G","short_pith_number":"pith:P4XXDSU3","canonical_record":{"source":{"id":"1707.06029","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-19T11:44:36Z","cross_cats_sorted":[],"title_canon_sha256":"fd6834e9f70bcfff0c7c2795160ea56eef9c314241a583f2b57bc497c28821d6","abstract_canon_sha256":"5b526e8667b3eaff943b06a04c0dd53ccb376a56d953d112feb4e7708d94c01e"},"schema_version":"1.0"},"canonical_sha256":"7f2f71ca9bd3bed6551a461c716aecd9b565c0f2a717d4bec1ce05970eaae0e3","source":{"kind":"arxiv","id":"1707.06029","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.06029","created_at":"2026-05-18T00:39:57Z"},{"alias_kind":"arxiv_version","alias_value":"1707.06029v1","created_at":"2026-05-18T00:39:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.06029","created_at":"2026-05-18T00:39:57Z"},{"alias_kind":"pith_short_12","alias_value":"P4XXDSU32O7N","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_16","alias_value":"P4XXDSU32O7NMVI2","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_8","alias_value":"P4XXDSU3","created_at":"2026-05-18T12:31:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:P4XXDSU32O7NMVI2IYOHC2XM3G","target":"record","payload":{"canonical_record":{"source":{"id":"1707.06029","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-19T11:44:36Z","cross_cats_sorted":[],"title_canon_sha256":"fd6834e9f70bcfff0c7c2795160ea56eef9c314241a583f2b57bc497c28821d6","abstract_canon_sha256":"5b526e8667b3eaff943b06a04c0dd53ccb376a56d953d112feb4e7708d94c01e"},"schema_version":"1.0"},"canonical_sha256":"7f2f71ca9bd3bed6551a461c716aecd9b565c0f2a717d4bec1ce05970eaae0e3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:39:57.761238Z","signature_b64":"zUMzhqMUxfmv9JnP8Io8CV8tgzPrki0nssDKGrfLsaftHbAQBcO39+hVJI6kLoJ+bswtFEv1mj5mU+I19EYyAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7f2f71ca9bd3bed6551a461c716aecd9b565c0f2a717d4bec1ce05970eaae0e3","last_reissued_at":"2026-05-18T00:39:57.760717Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:39:57.760717Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.06029","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:39:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nSKqGdwppJuc8dkg5f+oVUvT9+45o7qOSURlX19bAxdMoJQnbqWvdJWR5FBOdtxxsiafmHafijOW13+5H5TJBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T13:57:52.246233Z"},"content_sha256":"8221d612d582ca075f372309a907ead159c70b6db4cca7a47e458d1788d705ac","schema_version":"1.0","event_id":"sha256:8221d612d582ca075f372309a907ead159c70b6db4cca7a47e458d1788d705ac"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:P4XXDSU32O7NMVI2IYOHC2XM3G","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Supervising Neural Attention Models for Video Captioning by Human Gaze Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Gunhee Kim, Jongwook Choi, Kyung Yoo, Sang-Hun Lee, Yeonhwa Kim, Youngjae Yu","submitted_at":"2017-07-19T11:44:36Z","abstract_excerpt":"The attention mechanisms in deep neural networks are inspired by human's attention that sequentially focuses on the most relevant parts of the information over time to generate prediction output. The attention parameters in those models are implicitly trained in an end-to-end manner, yet there have been few trials to explicitly incorporate human gaze tracking to supervise the attention models. In this paper, we investigate whether attention models can benefit from explicit human gaze labels, especially for the task of video captioning. We collect a new dataset called VAS, consisting of movie c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.06029","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:39:57Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8T3Czk/iqchPQA8W6DpOwpmP72F/V1eCYf7hdxNeGvVWufrHfoU5NsS+/JLGx5NWsnnYBckJczIZH/4isqg7AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T13:57:52.246601Z"},"content_sha256":"433c3b0dd0a753dd35ba5ba3532501d014051f2a6a99e2d86ab678ac8c4cb50a","schema_version":"1.0","event_id":"sha256:433c3b0dd0a753dd35ba5ba3532501d014051f2a6a99e2d86ab678ac8c4cb50a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/P4XXDSU32O7NMVI2IYOHC2XM3G/bundle.json","state_url":"https://pith.science/pith/P4XXDSU32O7NMVI2IYOHC2XM3G/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/P4XXDSU32O7NMVI2IYOHC2XM3G/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T13:57:52Z","links":{"resolver":"https://pith.science/pith/P4XXDSU32O7NMVI2IYOHC2XM3G","bundle":"https://pith.science/pith/P4XXDSU32O7NMVI2IYOHC2XM3G/bundle.json","state":"https://pith.science/pith/P4XXDSU32O7NMVI2IYOHC2XM3G/state.json","well_known_bundle":"https://pith.science/.well-known/pith/P4XXDSU32O7NMVI2IYOHC2XM3G/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:P4XXDSU32O7NMVI2IYOHC2XM3G","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5b526e8667b3eaff943b06a04c0dd53ccb376a56d953d112feb4e7708d94c01e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-19T11:44:36Z","title_canon_sha256":"fd6834e9f70bcfff0c7c2795160ea56eef9c314241a583f2b57bc497c28821d6"},"schema_version":"1.0","source":{"id":"1707.06029","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.06029","created_at":"2026-05-18T00:39:57Z"},{"alias_kind":"arxiv_version","alias_value":"1707.06029v1","created_at":"2026-05-18T00:39:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.06029","created_at":"2026-05-18T00:39:57Z"},{"alias_kind":"pith_short_12","alias_value":"P4XXDSU32O7N","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_16","alias_value":"P4XXDSU32O7NMVI2","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_8","alias_value":"P4XXDSU3","created_at":"2026-05-18T12:31:37Z"}],"graph_snapshots":[{"event_id":"sha256:433c3b0dd0a753dd35ba5ba3532501d014051f2a6a99e2d86ab678ac8c4cb50a","target":"graph","created_at":"2026-05-18T00:39:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The attention mechanisms in deep neural networks are inspired by human's attention that sequentially focuses on the most relevant parts of the information over time to generate prediction output. The attention parameters in those models are implicitly trained in an end-to-end manner, yet there have been few trials to explicitly incorporate human gaze tracking to supervise the attention models. In this paper, we investigate whether attention models can benefit from explicit human gaze labels, especially for the task of video captioning. We collect a new dataset called VAS, consisting of movie c","authors_text":"Gunhee Kim, Jongwook Choi, Kyung Yoo, Sang-Hun Lee, Yeonhwa Kim, Youngjae Yu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-19T11:44:36Z","title":"Supervising Neural Attention Models for Video Captioning by Human Gaze Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.06029","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8221d612d582ca075f372309a907ead159c70b6db4cca7a47e458d1788d705ac","target":"record","created_at":"2026-05-18T00:39:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5b526e8667b3eaff943b06a04c0dd53ccb376a56d953d112feb4e7708d94c01e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-07-19T11:44:36Z","title_canon_sha256":"fd6834e9f70bcfff0c7c2795160ea56eef9c314241a583f2b57bc497c28821d6"},"schema_version":"1.0","source":{"id":"1707.06029","kind":"arxiv","version":1}},"canonical_sha256":"7f2f71ca9bd3bed6551a461c716aecd9b565c0f2a717d4bec1ce05970eaae0e3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7f2f71ca9bd3bed6551a461c716aecd9b565c0f2a717d4bec1ce05970eaae0e3","first_computed_at":"2026-05-18T00:39:57.760717Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:39:57.760717Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zUMzhqMUxfmv9JnP8Io8CV8tgzPrki0nssDKGrfLsaftHbAQBcO39+hVJI6kLoJ+bswtFEv1mj5mU+I19EYyAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:39:57.761238Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.06029","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8221d612d582ca075f372309a907ead159c70b6db4cca7a47e458d1788d705ac","sha256:433c3b0dd0a753dd35ba5ba3532501d014051f2a6a99e2d86ab678ac8c4cb50a"],"state_sha256":"d9d543a9dd254213b802e75875d243debc95e57d6865143b8d5f395c00c7c170"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cWlznL0MmqCgwt2Fol6qxDsibZ0XbaEikiZWs3OzaB5dKLU3tA2Zo8j494bi7/ChhIVI/9T/b3vdnqyDu81UAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T13:57:52.248605Z","bundle_sha256":"f755659dd418906653fd807a7ef2133da4d4e902b35f2ea8a6d28b30bae9d549"}}