{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:KG2TP5XHITD7FSB6NEI32PNPBP","short_pith_number":"pith:KG2TP5XH","schema_version":"1.0","canonical_sha256":"51b537f6e744c7f2c83e6911bd3daf0bd2989709be8c4a37ac744095a5b2903f","source":{"kind":"arxiv","id":"2605.14733","version":1},"attestation_state":"computed","paper":{"title":"Video-Zero: Self-Evolution Video Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Deyi Ji, Lanyun Zhu, Ruihang Chu, Ruixu Zhang, Xuanyi Liu, Yujiu Yang, Yuxin Meng","submitted_at":"2026-05-14T11:56:14Z","abstract_excerpt":"Self-evolution offers a promising path for improving reasoning models without relying on intensive human annotation. However, extending this paradigm to video understanding remains underexplored and challenging: videos are long, dynamic, and redundant, while the evidence needed for reasoning is often sparse and temporally localized. Naively generating difficult question-answer pairs from full videos can therefore produce supervision that appears challenging but is weakly grounded, relying on static cues or language priors rather than temporal evidence. In this work, we argue that the key bottl"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.14733","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T11:56:14Z","cross_cats_sorted":[],"title_canon_sha256":"5347b4ae58db86a8b98f87edf020c0ad2c6138b3a48cdcd4bda4e9ab2675ce26","abstract_canon_sha256":"286c6666d75116f46f2bf6af3de895bc47fb239c3d33afd94b382eb0bd1b96bd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:59.005533Z","signature_b64":"qsuIZH4V5gOCy0vPf6Z/6WMj9q25HvBkewf0qtGBIKXY9JL5NVpfS2r9CrE9giGnFZ1luGL82umuRECBzW5IBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"51b537f6e744c7f2c83e6911bd3daf0bd2989709be8c4a37ac744095a5b2903f","last_reissued_at":"2026-05-17T23:38:59.004841Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:59.004841Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Video-Zero: Self-Evolution Video Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Deyi Ji, Lanyun Zhu, Ruihang Chu, Ruixu Zhang, Xuanyi Liu, Yujiu Yang, Yuxin Meng","submitted_at":"2026-05-14T11:56:14Z","abstract_excerpt":"Self-evolution offers a promising path for improving reasoning models without relying on intensive human annotation. However, extending this paradigm to video understanding remains underexplored and challenging: videos are long, dynamic, and redundant, while the evidence needed for reasoning is often sparse and temporally localized. Naively generating difficult question-answer pairs from full videos can therefore produce supervision that appears challenging but is weakly grounded, relying on static cues or language priors rather than temporal evidence. In this work, we argue that the key bottl"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.14733","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.14733","created_at":"2026-05-17T23:38:59.004949+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.14733v1","created_at":"2026-05-17T23:38:59.004949+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14733","created_at":"2026-05-17T23:38:59.004949+00:00"},{"alias_kind":"pith_short_12","alias_value":"KG2TP5XHITD7","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"KG2TP5XHITD7FSB6","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"KG2TP5XH","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP","json":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP.json","graph_json":"https://pith.science/api/pith-number/KG2TP5XHITD7FSB6NEI32PNPBP/graph.json","events_json":"https://pith.science/api/pith-number/KG2TP5XHITD7FSB6NEI32PNPBP/events.json","paper":"https://pith.science/paper/KG2TP5XH"},"agent_actions":{"view_html":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP","download_json":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP.json","view_paper":"https://pith.science/paper/KG2TP5XH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.14733&json=true","fetch_graph":"https://pith.science/api/pith-number/KG2TP5XHITD7FSB6NEI32PNPBP/graph.json","fetch_events":"https://pith.science/api/pith-number/KG2TP5XHITD7FSB6NEI32PNPBP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP/action/storage_attestation","attest_author":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP/action/author_attestation","sign_citation":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP/action/citation_signature","submit_replication":"https://pith.science/pith/KG2TP5XHITD7FSB6NEI32PNPBP/action/replication_record"}},"created_at":"2026-05-17T23:38:59.004949+00:00","updated_at":"2026-05-17T23:38:59.004949+00:00"}