{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:M7OWQRN6UIB4AISB4UEROEPQKR","short_pith_number":"pith:M7OWQRN6","schema_version":"1.0","canonical_sha256":"67dd6845bea203c02241e5091711f05442ec8dbd593bc5b3ddc3b7ff6f092930","source":{"kind":"arxiv","id":"2606.26994","version":1},"attestation_state":"computed","paper":{"title":"Event-Aware Instructed Assistant for Referring Video Segmentation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Henghui Ding, Jinyu Liu, Shuting He, Yu-Gang Jiang","submitted_at":"2026-06-25T13:12:43Z","abstract_excerpt":"Existing referring video segmentation methods often treat a video as a single event consisting of multiple images, overlooking the fact that a video typically contains multiple distinct events. Under such a mechanism, the model needs to directly understand all the complex content in the video and text, which can easily lead to confusion and hallucinations. To address this issue, we propose to decompose a video to a set of simple events by learnable Event Query, and understand complex video content in an event-by-event, easy-to-understand manner. This is based on the observation that natural la"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.26994","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-25T13:12:43Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"7b642049d302eb273d45eeef0d15c6c26ae84850fe71d6e16ac6b200b8b89100","abstract_canon_sha256":"5ef4728c5628ee7317aad20f02179f1b9174a4011d76f5374c5e4fa454c7ad98"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:16:06.182986Z","signature_b64":"S6Ga6ICLY8SpZpMlVzBTpCewBbdHJOFttX9DT8PH2aS/LtPvokRv1HNzcrftsoDI5pkZANMJR8OP65pdW+u/Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"67dd6845bea203c02241e5091711f05442ec8dbd593bc5b3ddc3b7ff6f092930","last_reissued_at":"2026-06-26T01:16:06.182583Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:16:06.182583Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Event-Aware Instructed Assistant for Referring Video Segmentation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Henghui Ding, Jinyu Liu, Shuting He, Yu-Gang Jiang","submitted_at":"2026-06-25T13:12:43Z","abstract_excerpt":"Existing referring video segmentation methods often treat a video as a single event consisting of multiple images, overlooking the fact that a video typically contains multiple distinct events. Under such a mechanism, the model needs to directly understand all the complex content in the video and text, which can easily lead to confusion and hallucinations. To address this issue, we propose to decompose a video to a set of simple events by learnable Event Query, and understand complex video content in an event-by-event, easy-to-understand manner. This is based on the observation that natural la"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26994","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26994/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.26994","created_at":"2026-06-26T01:16:06.182642+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.26994v1","created_at":"2026-06-26T01:16:06.182642+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26994","created_at":"2026-06-26T01:16:06.182642+00:00"},{"alias_kind":"pith_short_12","alias_value":"M7OWQRN6UIB4","created_at":"2026-06-26T01:16:06.182642+00:00"},{"alias_kind":"pith_short_16","alias_value":"M7OWQRN6UIB4AISB","created_at":"2026-06-26T01:16:06.182642+00:00"},{"alias_kind":"pith_short_8","alias_value":"M7OWQRN6","created_at":"2026-06-26T01:16:06.182642+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR","json":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR.json","graph_json":"https://pith.science/api/pith-number/M7OWQRN6UIB4AISB4UEROEPQKR/graph.json","events_json":"https://pith.science/api/pith-number/M7OWQRN6UIB4AISB4UEROEPQKR/events.json","paper":"https://pith.science/paper/M7OWQRN6"},"agent_actions":{"view_html":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR","download_json":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR.json","view_paper":"https://pith.science/paper/M7OWQRN6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.26994&json=true","fetch_graph":"https://pith.science/api/pith-number/M7OWQRN6UIB4AISB4UEROEPQKR/graph.json","fetch_events":"https://pith.science/api/pith-number/M7OWQRN6UIB4AISB4UEROEPQKR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR/action/storage_attestation","attest_author":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR/action/author_attestation","sign_citation":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR/action/citation_signature","submit_replication":"https://pith.science/pith/M7OWQRN6UIB4AISB4UEROEPQKR/action/replication_record"}},"created_at":"2026-06-26T01:16:06.182642+00:00","updated_at":"2026-06-26T01:16:06.182642+00:00"}