{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:T46CC2VTVDHSLVONBACY7GDHJM","short_pith_number":"pith:T46CC2VT","schema_version":"1.0","canonical_sha256":"9f3c216ab3a8cf25d5cd08058f98674b365ad6697dfecb073aba41758fabd3dc","source":{"kind":"arxiv","id":"2606.21949","version":1},"attestation_state":"computed","paper":{"title":"CapRiCorn-1K: A Comprehensive Benchmark for Video Captioning and Subject Referential Consistency Across Temporal Scales","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Bohan Zeng, Bozhou Li, Jiafu Tang, Liang Wang, Pengfei Wan, Qiang Liu, Shihao Li, Tieniu Tan, Weihong Lin, Xinlong Chen, Yang Shi, Yiyan Ji, Yizhuo Jia, Yuanxing Zhang, Yue Ding","submitted_at":"2026-06-20T08:37:32Z","abstract_excerpt":"Accurate and comprehensive video captions with consistent subject references are critical for downstream understanding and generation tasks. However, few existing benchmarks can objectively and comprehensively evaluate these properties across diverse durations and scenarios, thereby hindering the advancement of video captioning models. To bridge this gap, we propose CapRiCorn-1K, a comprehensive benchmark designed to evaluate both video captioning quality and subject referential consistency across long temporal horizons and diverse video domains. To accommodate varied evaluation needs, our ben"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.21949","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-20T08:37:32Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"b0b6d51d34c99f075cd459922b71d8fd2e5dc91779f7d0ea6399cb55a27f12ee","abstract_canon_sha256":"2e959e06814b94121098fe312c863d5c38e153d8effcb2445757dfd3590cee3d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:13:04.006609Z","signature_b64":"mVhN8WfPBZtzcUmi5djQTKjhf58F5XuE+S9ZCsypKWArZi7sVhd11d3eW/ovElnuwzQQ8q29IWSQZo2Rmi6oCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9f3c216ab3a8cf25d5cd08058f98674b365ad6697dfecb073aba41758fabd3dc","last_reissued_at":"2026-06-23T02:13:04.006198Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:13:04.006198Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CapRiCorn-1K: A Comprehensive Benchmark for Video Captioning and Subject Referential Consistency Across Temporal Scales","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Bohan Zeng, Bozhou Li, Jiafu Tang, Liang Wang, Pengfei Wan, Qiang Liu, Shihao Li, Tieniu Tan, Weihong Lin, Xinlong Chen, Yang Shi, Yiyan Ji, Yizhuo Jia, Yuanxing Zhang, Yue Ding","submitted_at":"2026-06-20T08:37:32Z","abstract_excerpt":"Accurate and comprehensive video captions with consistent subject references are critical for downstream understanding and generation tasks. However, few existing benchmarks can objectively and comprehensively evaluate these properties across diverse durations and scenarios, thereby hindering the advancement of video captioning models. To bridge this gap, we propose CapRiCorn-1K, a comprehensive benchmark designed to evaluate both video captioning quality and subject referential consistency across long temporal horizons and diverse video domains. To accommodate varied evaluation needs, our ben"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21949","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21949/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.21949","created_at":"2026-06-23T02:13:04.006259+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.21949v1","created_at":"2026-06-23T02:13:04.006259+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21949","created_at":"2026-06-23T02:13:04.006259+00:00"},{"alias_kind":"pith_short_12","alias_value":"T46CC2VTVDHS","created_at":"2026-06-23T02:13:04.006259+00:00"},{"alias_kind":"pith_short_16","alias_value":"T46CC2VTVDHSLVON","created_at":"2026-06-23T02:13:04.006259+00:00"},{"alias_kind":"pith_short_8","alias_value":"T46CC2VT","created_at":"2026-06-23T02:13:04.006259+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM","json":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM.json","graph_json":"https://pith.science/api/pith-number/T46CC2VTVDHSLVONBACY7GDHJM/graph.json","events_json":"https://pith.science/api/pith-number/T46CC2VTVDHSLVONBACY7GDHJM/events.json","paper":"https://pith.science/paper/T46CC2VT"},"agent_actions":{"view_html":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM","download_json":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM.json","view_paper":"https://pith.science/paper/T46CC2VT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.21949&json=true","fetch_graph":"https://pith.science/api/pith-number/T46CC2VTVDHSLVONBACY7GDHJM/graph.json","fetch_events":"https://pith.science/api/pith-number/T46CC2VTVDHSLVONBACY7GDHJM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM/action/storage_attestation","attest_author":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM/action/author_attestation","sign_citation":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM/action/citation_signature","submit_replication":"https://pith.science/pith/T46CC2VTVDHSLVONBACY7GDHJM/action/replication_record"}},"created_at":"2026-06-23T02:13:04.006259+00:00","updated_at":"2026-06-23T02:13:04.006259+00:00"}