{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:NQSQNP2MDRLWVPKYGG6H7HDMDW","short_pith_number":"pith:NQSQNP2M","schema_version":"1.0","canonical_sha256":"6c2506bf4c1c576abd5831bc7f9c6c1dad04fee6f938e9e4f4fd1066ec3f61e4","source":{"kind":"arxiv","id":"2607.02096","version":1},"attestation_state":"computed","paper":{"title":"LongEgoRefer: A Benchmark for Long-Form Egocentric Video Referring Expression Comprehension","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chenhui Chu, Mahiro Ukai, Nakamasa Inoue, Shuhei Kurita, Shunya Kato, Taiki Miyanishi","submitted_at":"2026-07-02T12:32:53Z","abstract_excerpt":"Egocentric videos capture rich and diverse human-object interactions and have emerged as a fundamental resource for understanding human activities related to objects. In this context, Video Referring Expression Comprehension (Video REC), the task of localizing the temporal and spatial extent of a referred object in video frames given a natural language query, plays a key role in linking textual descriptions to observed objects in untrimmed egocentric recordings. However, existing egocentric Video REC benchmarks primarily focus on short video clips, where some target object appears densely with"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2607.02096","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-07-02T12:32:53Z","cross_cats_sorted":[],"title_canon_sha256":"b78779fb08edc4ef50375492ed92aae09201caf59af732475f346c6298227305","abstract_canon_sha256":"c63b63d988abf441d04c1911361e5478863764b4d02b9999b42699ebc23c7d7b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:17:42.164895Z","signature_b64":"PZFRXvpNhabSVQleWf8v7JmeOzVU9//Q4414UPK5b7sofPAz0tiTUwzKx9P53CLQ8iBvtyLJduooDr4hQ3HPBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6c2506bf4c1c576abd5831bc7f9c6c1dad04fee6f938e9e4f4fd1066ec3f61e4","last_reissued_at":"2026-07-03T01:17:42.164459Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:17:42.164459Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"LongEgoRefer: A Benchmark for Long-Form Egocentric Video Referring Expression Comprehension","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chenhui Chu, Mahiro Ukai, Nakamasa Inoue, Shuhei Kurita, Shunya Kato, Taiki Miyanishi","submitted_at":"2026-07-02T12:32:53Z","abstract_excerpt":"Egocentric videos capture rich and diverse human-object interactions and have emerged as a fundamental resource for understanding human activities related to objects. In this context, Video Referring Expression Comprehension (Video REC), the task of localizing the temporal and spatial extent of a referred object in video frames given a natural language query, plays a key role in linking textual descriptions to observed objects in untrimmed egocentric recordings. However, existing egocentric Video REC benchmarks primarily focus on short video clips, where some target object appears densely with"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.02096","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.02096/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2607.02096","created_at":"2026-07-03T01:17:42.164519+00:00"},{"alias_kind":"arxiv_version","alias_value":"2607.02096v1","created_at":"2026-07-03T01:17:42.164519+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.02096","created_at":"2026-07-03T01:17:42.164519+00:00"},{"alias_kind":"pith_short_12","alias_value":"NQSQNP2MDRLW","created_at":"2026-07-03T01:17:42.164519+00:00"},{"alias_kind":"pith_short_16","alias_value":"NQSQNP2MDRLWVPKY","created_at":"2026-07-03T01:17:42.164519+00:00"},{"alias_kind":"pith_short_8","alias_value":"NQSQNP2M","created_at":"2026-07-03T01:17:42.164519+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW","json":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW.json","graph_json":"https://pith.science/api/pith-number/NQSQNP2MDRLWVPKYGG6H7HDMDW/graph.json","events_json":"https://pith.science/api/pith-number/NQSQNP2MDRLWVPKYGG6H7HDMDW/events.json","paper":"https://pith.science/paper/NQSQNP2M"},"agent_actions":{"view_html":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW","download_json":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW.json","view_paper":"https://pith.science/paper/NQSQNP2M","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2607.02096&json=true","fetch_graph":"https://pith.science/api/pith-number/NQSQNP2MDRLWVPKYGG6H7HDMDW/graph.json","fetch_events":"https://pith.science/api/pith-number/NQSQNP2MDRLWVPKYGG6H7HDMDW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW/action/storage_attestation","attest_author":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW/action/author_attestation","sign_citation":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW/action/citation_signature","submit_replication":"https://pith.science/pith/NQSQNP2MDRLWVPKYGG6H7HDMDW/action/replication_record"}},"created_at":"2026-07-03T01:17:42.164519+00:00","updated_at":"2026-07-03T01:17:42.164519+00:00"}