{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:3WUA2SMOA5IDJOY4KXU55G44FG","short_pith_number":"pith:3WUA2SMO","canonical_record":{"source":{"id":"2606.24464","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T11:57:08Z","cross_cats_sorted":[],"title_canon_sha256":"68bc96f008b96a41e1e34b28d5f4808f43a634f8c4077f6716bff5a7852a2c0c","abstract_canon_sha256":"c9111315e0ed4b1352f496378b93ff1d15b190b12c222b54f8a38d2f73034224"},"schema_version":"1.0"},"canonical_sha256":"dda80d498e075034bb1c55e9de9b9c29bf29f1e4ea51095f45f0a9c1c4e5e501","source":{"kind":"arxiv","id":"2606.24464","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24464","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24464v1","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24464","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"pith_short_12","alias_value":"3WUA2SMOA5ID","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"pith_short_16","alias_value":"3WUA2SMOA5IDJOY4","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"pith_short_8","alias_value":"3WUA2SMO","created_at":"2026-06-24T01:15:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:3WUA2SMOA5IDJOY4KXU55G44FG","target":"record","payload":{"canonical_record":{"source":{"id":"2606.24464","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T11:57:08Z","cross_cats_sorted":[],"title_canon_sha256":"68bc96f008b96a41e1e34b28d5f4808f43a634f8c4077f6716bff5a7852a2c0c","abstract_canon_sha256":"c9111315e0ed4b1352f496378b93ff1d15b190b12c222b54f8a38d2f73034224"},"schema_version":"1.0"},"canonical_sha256":"dda80d498e075034bb1c55e9de9b9c29bf29f1e4ea51095f45f0a9c1c4e5e501","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:31.095931Z","signature_b64":"ydabPmnXFXETd/mItvWBv2VPo1zJ2x8Iu1kgSoqctC3vGzRJDVqe/M9UVnIIanHT3YsCFwbgbtO6nnB20FTJCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dda80d498e075034bb1c55e9de9b9c29bf29f1e4ea51095f45f0a9c1c4e5e501","last_reissued_at":"2026-06-24T01:15:31.095568Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:31.095568Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.24464","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4L1mDW82zqIuludMR6A9sRMUnbejPeF5yiIyi0hKj+PxPTIKXK+LhIMny3OOF9VQvTrd8E3gbda4mTfqS8klDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T13:47:59.569112Z"},"content_sha256":"238781e5769db56d97697ae7ace8fceda573ed5bd719ec7a340b2edd1c0a999e","schema_version":"1.0","event_id":"sha256:238781e5769db56d97697ae7ace8fceda573ed5bd719ec7a340b2edd1c0a999e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:3WUA2SMOA5IDJOY4KXU55G44FG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Boosting Text-Driven Video Segmentation via Geometry-Aware Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hesong Li, Tianyu Zhu, Ying Fu, Yingping Liang","submitted_at":"2026-06-23T11:57:08Z","abstract_excerpt":"Text-driven Referring Video Object Segmentation (RVOS) aims to locate and segment target objects in videos given natural language. However, existing models are typically trained on 2D image or video datasets with naive segmentation losses, which overlooks the geometric consistency across frames and leads to weak spatial understanding. In this paper, we propose Geometry-enhanced Language-guided Video segmentation (GeoLaV), a two-stage framework that distills 3D geometric knowledge from images to enhance text-driven video segmentation. In the first stage, we perform monocular geometry pretrainin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24464","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24464/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bD4FP+HaamX34Uhabl2HKqg7AYmr3Bib9oUOEzFiUxajdLNjiSOAzElhVLx0TGjEAk1CL/uSoGWXA4VLcD40AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T13:47:59.569516Z"},"content_sha256":"f863bfc31db9d5478ba52975205995f3df36021a141ff6dfd7bf725e0d44ee8e","schema_version":"1.0","event_id":"sha256:f863bfc31db9d5478ba52975205995f3df36021a141ff6dfd7bf725e0d44ee8e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3WUA2SMOA5IDJOY4KXU55G44FG/bundle.json","state_url":"https://pith.science/pith/3WUA2SMOA5IDJOY4KXU55G44FG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3WUA2SMOA5IDJOY4KXU55G44FG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-24T13:47:59Z","links":{"resolver":"https://pith.science/pith/3WUA2SMOA5IDJOY4KXU55G44FG","bundle":"https://pith.science/pith/3WUA2SMOA5IDJOY4KXU55G44FG/bundle.json","state":"https://pith.science/pith/3WUA2SMOA5IDJOY4KXU55G44FG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3WUA2SMOA5IDJOY4KXU55G44FG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3WUA2SMOA5IDJOY4KXU55G44FG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c9111315e0ed4b1352f496378b93ff1d15b190b12c222b54f8a38d2f73034224","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T11:57:08Z","title_canon_sha256":"68bc96f008b96a41e1e34b28d5f4808f43a634f8c4077f6716bff5a7852a2c0c"},"schema_version":"1.0","source":{"id":"2606.24464","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24464","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24464v1","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24464","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"pith_short_12","alias_value":"3WUA2SMOA5ID","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"pith_short_16","alias_value":"3WUA2SMOA5IDJOY4","created_at":"2026-06-24T01:15:31Z"},{"alias_kind":"pith_short_8","alias_value":"3WUA2SMO","created_at":"2026-06-24T01:15:31Z"}],"graph_snapshots":[{"event_id":"sha256:f863bfc31db9d5478ba52975205995f3df36021a141ff6dfd7bf725e0d44ee8e","target":"graph","created_at":"2026-06-24T01:15:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24464/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Text-driven Referring Video Object Segmentation (RVOS) aims to locate and segment target objects in videos given natural language. However, existing models are typically trained on 2D image or video datasets with naive segmentation losses, which overlooks the geometric consistency across frames and leads to weak spatial understanding. In this paper, we propose Geometry-enhanced Language-guided Video segmentation (GeoLaV), a two-stage framework that distills 3D geometric knowledge from images to enhance text-driven video segmentation. In the first stage, we perform monocular geometry pretrainin","authors_text":"Hesong Li, Tianyu Zhu, Ying Fu, Yingping Liang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T11:57:08Z","title":"Boosting Text-Driven Video Segmentation via Geometry-Aware Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24464","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:238781e5769db56d97697ae7ace8fceda573ed5bd719ec7a340b2edd1c0a999e","target":"record","created_at":"2026-06-24T01:15:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c9111315e0ed4b1352f496378b93ff1d15b190b12c222b54f8a38d2f73034224","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-23T11:57:08Z","title_canon_sha256":"68bc96f008b96a41e1e34b28d5f4808f43a634f8c4077f6716bff5a7852a2c0c"},"schema_version":"1.0","source":{"id":"2606.24464","kind":"arxiv","version":1}},"canonical_sha256":"dda80d498e075034bb1c55e9de9b9c29bf29f1e4ea51095f45f0a9c1c4e5e501","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dda80d498e075034bb1c55e9de9b9c29bf29f1e4ea51095f45f0a9c1c4e5e501","first_computed_at":"2026-06-24T01:15:31.095568Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:31.095568Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ydabPmnXFXETd/mItvWBv2VPo1zJ2x8Iu1kgSoqctC3vGzRJDVqe/M9UVnIIanHT3YsCFwbgbtO6nnB20FTJCg==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:31.095931Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24464","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:238781e5769db56d97697ae7ace8fceda573ed5bd719ec7a340b2edd1c0a999e","sha256:f863bfc31db9d5478ba52975205995f3df36021a141ff6dfd7bf725e0d44ee8e"],"state_sha256":"6875762e0f775a9bc9f782fe7215260231944023be76d23c238d64a5c1f9b479"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lzijm0yZkGhy4e8A9iyPCdeVeh+76otIkJhu+JSY7C65bSJXqqgW5KYyhD30R/14/JYhY3PfTqZmwU+G34qRAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-24T13:47:59.571541Z","bundle_sha256":"0a140194117413eb25536857018eb194549ae69613856668be4937164f20f3bc"}}