{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:GZAB3TTTUXDYX7BR2ZY5ARBO4B","short_pith_number":"pith:GZAB3TTT","schema_version":"1.0","canonical_sha256":"36401dce73a5c78bfc31d671d0442ee07e837434650745cf9b96517e4ecc84ae","source":{"kind":"arxiv","id":"1801.01582","version":2},"attestation_state":"computed","paper":{"title":"Object Referring in Videos with Language and Human Gaze","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Arun Balajee Vasudevan, Dengxin Dai, Luc Van Gool","submitted_at":"2018-01-04T23:31:20Z","abstract_excerpt":"We investigate the problem of object referring (OR) i.e. to localize a target object in a visual scene coming with a language description. Humans perceive the world more as continued video snippets than as static images, and describe objects not only by their appearance, but also by their spatio-temporal context and motion features. Humans also gaze at the object when they issue a referring expression. Existing works for OR mostly focus on static images only, which fall short in providing many such cues. This paper addresses OR in videos with language and human gaze. To that end, we present a "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1801.01582","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-01-04T23:31:20Z","cross_cats_sorted":[],"title_canon_sha256":"9832d3ac07b0707e11221306c45e3c66326b801dd6c0955408a6baa6a5b2a3e2","abstract_canon_sha256":"b85de239f00879bb9579db4085dac87f00ad4fbfb0a494d918ed8118012b08f4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:19:16.545151Z","signature_b64":"JIYxJLli/aQw/k1ngmW1CdMDYmWLOFKO3Dnna4chBSBPHTgnOrnbnY0fDuGcPj7avjp7aMauDUpfrRaMR/XVBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"36401dce73a5c78bfc31d671d0442ee07e837434650745cf9b96517e4ecc84ae","last_reissued_at":"2026-05-18T00:19:16.544521Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:19:16.544521Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Object Referring in Videos with Language and Human Gaze","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Arun Balajee Vasudevan, Dengxin Dai, Luc Van Gool","submitted_at":"2018-01-04T23:31:20Z","abstract_excerpt":"We investigate the problem of object referring (OR) i.e. to localize a target object in a visual scene coming with a language description. Humans perceive the world more as continued video snippets than as static images, and describe objects not only by their appearance, but also by their spatio-temporal context and motion features. Humans also gaze at the object when they issue a referring expression. Existing works for OR mostly focus on static images only, which fall short in providing many such cues. This paper addresses OR in videos with language and human gaze. To that end, we present a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.01582","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1801.01582","created_at":"2026-05-18T00:19:16.544605+00:00"},{"alias_kind":"arxiv_version","alias_value":"1801.01582v2","created_at":"2026-05-18T00:19:16.544605+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.01582","created_at":"2026-05-18T00:19:16.544605+00:00"},{"alias_kind":"pith_short_12","alias_value":"GZAB3TTTUXDY","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_16","alias_value":"GZAB3TTTUXDYX7BR","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_8","alias_value":"GZAB3TTT","created_at":"2026-05-18T12:32:25.280505+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B","json":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B.json","graph_json":"https://pith.science/api/pith-number/GZAB3TTTUXDYX7BR2ZY5ARBO4B/graph.json","events_json":"https://pith.science/api/pith-number/GZAB3TTTUXDYX7BR2ZY5ARBO4B/events.json","paper":"https://pith.science/paper/GZAB3TTT"},"agent_actions":{"view_html":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B","download_json":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B.json","view_paper":"https://pith.science/paper/GZAB3TTT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1801.01582&json=true","fetch_graph":"https://pith.science/api/pith-number/GZAB3TTTUXDYX7BR2ZY5ARBO4B/graph.json","fetch_events":"https://pith.science/api/pith-number/GZAB3TTTUXDYX7BR2ZY5ARBO4B/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B/action/storage_attestation","attest_author":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B/action/author_attestation","sign_citation":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B/action/citation_signature","submit_replication":"https://pith.science/pith/GZAB3TTTUXDYX7BR2ZY5ARBO4B/action/replication_record"}},"created_at":"2026-05-18T00:19:16.544605+00:00","updated_at":"2026-05-18T00:19:16.544605+00:00"}