{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3LCP4KS3COZVR5SCQFVS4U5E5R","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d0805f6edd2541a4a5b621e1d70498fd07a43471a7b3635870b979300524e772","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T01:44:38Z","title_canon_sha256":"732b326777bd1e9bffee539a290bfb6b919c8c16d110634e4d9e6ce46b579abc"},"schema_version":"1.0","source":{"id":"2606.06819","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.06819","created_at":"2026-06-08T01:04:29Z"},{"alias_kind":"arxiv_version","alias_value":"2606.06819v1","created_at":"2026-06-08T01:04:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06819","created_at":"2026-06-08T01:04:29Z"},{"alias_kind":"pith_short_12","alias_value":"3LCP4KS3COZV","created_at":"2026-06-08T01:04:29Z"},{"alias_kind":"pith_short_16","alias_value":"3LCP4KS3COZVR5SC","created_at":"2026-06-08T01:04:29Z"},{"alias_kind":"pith_short_8","alias_value":"3LCP4KS3","created_at":"2026-06-08T01:04:29Z"}],"graph_snapshots":[{"event_id":"sha256:a0b08aba6975dc0dcea183d1805df794f9212e9fda8d45df55329c227917ca45","target":"graph","created_at":"2026-06-08T01:04:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.06819/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reasoning Video Object Segmentation (RVOS) demands a sophisticated integration of temporal dynamics, spatial details, and linguistic reasoning to achieve precise pixel-level localization. Existing methods are limited to reasoning over fixed initial inputs and lack the capacity to actively acquire further visual evidence, which is often essential for resolving complex references in long or intricate videos. To address this, we propose \\textbf{VideoSEG-O3}, the first multi-turn reinforcement learning framework for RVOS that emulates the human \\textit{``coarse-to-fine''} cognitive process. It emp","authors_text":"Boqiang Duan, Boyuan Tong, Jiedong Zhuang, Jingdong Wang, Ming Dai, Sen Yang, Wankou Yang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T01:44:38Z","title":"VideoSEG-O3: A Multi-turn Reinforcement Learning Framework for Reasoning Video Object Segmentation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06819","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fc2b92144c4492042e20aa05479e4d7da65eb95cbb74a3bdb684a307ec080ebc","target":"record","created_at":"2026-06-08T01:04:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d0805f6edd2541a4a5b621e1d70498fd07a43471a7b3635870b979300524e772","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-05T01:44:38Z","title_canon_sha256":"732b326777bd1e9bffee539a290bfb6b919c8c16d110634e4d9e6ce46b579abc"},"schema_version":"1.0","source":{"id":"2606.06819","kind":"arxiv","version":1}},"canonical_sha256":"dac4fe2a5b13b358f642816b2e53a4ec7e75f122a298a087555a53bd42fff984","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dac4fe2a5b13b358f642816b2e53a4ec7e75f122a298a087555a53bd42fff984","first_computed_at":"2026-06-08T01:04:29.814680Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:04:29.814680Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"O8ef7y2kzgP2IBIMeld6/QPcdANtbv4P/IOPeyZMd43oD/ZZvZ8ycTGvRfXJdFxbrP6qftV5AN47igw8YJ5yBg==","signature_status":"signed_v1","signed_at":"2026-06-08T01:04:29.815513Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.06819","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fc2b92144c4492042e20aa05479e4d7da65eb95cbb74a3bdb684a307ec080ebc","sha256:a0b08aba6975dc0dcea183d1805df794f9212e9fda8d45df55329c227917ca45"],"state_sha256":"53ecd80c101270848afcb26b6734fd5c2f38943d64f5bcf20423c8f7407df88d"}