{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:BVHKYF3DTR7ZLD7LKNRVZBQ62B","short_pith_number":"pith:BVHKYF3D","canonical_record":{"source":{"id":"2602.04094","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-04T00:08:35Z","cross_cats_sorted":[],"title_canon_sha256":"704c9b4d263ee671a26dbb004028d77b1a000f12c4b90f9a90aad9159ed6c7e8","abstract_canon_sha256":"5225f08639ec653413ae5105dd520b78b01ce4316e8c4949119f45f4a7f4164c"},"schema_version":"1.0"},"canonical_sha256":"0d4eac17639c7f958feb53635c861ed06f006223bf90db558bdbddd92c1c1d80","source":{"kind":"arxiv","id":"2602.04094","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.04094","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"arxiv_version","alias_value":"2602.04094v2","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.04094","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"pith_short_12","alias_value":"BVHKYF3DTR7Z","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"pith_short_16","alias_value":"BVHKYF3DTR7ZLD7L","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"pith_short_8","alias_value":"BVHKYF3D","created_at":"2026-06-02T01:04:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:BVHKYF3DTR7ZLD7LKNRVZBQ62B","target":"record","payload":{"canonical_record":{"source":{"id":"2602.04094","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-04T00:08:35Z","cross_cats_sorted":[],"title_canon_sha256":"704c9b4d263ee671a26dbb004028d77b1a000f12c4b90f9a90aad9159ed6c7e8","abstract_canon_sha256":"5225f08639ec653413ae5105dd520b78b01ce4316e8c4949119f45f4a7f4164c"},"schema_version":"1.0"},"canonical_sha256":"0d4eac17639c7f958feb53635c861ed06f006223bf90db558bdbddd92c1c1d80","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:04:15.605664Z","signature_b64":"OtrSLOPManhHhds9qU3z85ScEPy7Zs+KfLIm4FSKsSOwSJqpcEORGldNR3z2fmVtWTb1jevaW3u7ivkEhNTJAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0d4eac17639c7f958feb53635c861ed06f006223bf90db558bdbddd92c1c1d80","last_reissued_at":"2026-06-02T01:04:15.605117Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:04:15.605117Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.04094","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T01:04:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"h6lgsnqkfa+KsMewsshKGVO7KBscV4Ck8ehDUUiIA/RisnL9lcuXWPD3sZlSOwTxVzh6dQCFAXqawoo6Ldi8Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T20:14:25.158680Z"},"content_sha256":"a5322da7c28c37f7969c3bd1e8740862778615b85978e8a828ec578763ae028b","schema_version":"1.0","event_id":"sha256:a5322da7c28c37f7969c3bd1e8740862778615b85978e8a828ec578763ae028b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:BVHKYF3DTR7ZLD7LKNRVZBQ62B","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VideoBrain: Learning Adaptive Frame Sampling for Long Video Understanding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Junbo Zou, Liwen Zhang, Shengjie Zhang, Weining Shen, Ziheng Huang","submitted_at":"2026-02-04T00:08:35Z","abstract_excerpt":"Long-form video understanding remains challenging for Vision-Language Models (VLMs) due to the inherent tension between computational constraints and the need to capture information distributed across thousands of frames. Existing approaches either sample frames uniformly (risking information loss) or select keyframes in a single pass (with no recovery from poor choices). We propose VideoBrain, an end-to-end framework that enables VLMs to adaptively acquire visual information through learned sampling policies. Our approach features dual complementary agents: a CLIP-based agent for semantic ret"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.04094","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.04094/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T01:04:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CNGqWcFZr91zhHq7IbS+VuNTFyAEmEWhIyLw0Hc7jDfLOHiU9AMRbrnqTBjc09sxgBXZBwXYOpsOIqT6sfwIDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T20:14:25.159062Z"},"content_sha256":"c750e837d84257bb4c74d3fe08153f64a2751dacadb01204fb35560f39131211","schema_version":"1.0","event_id":"sha256:c750e837d84257bb4c74d3fe08153f64a2751dacadb01204fb35560f39131211"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B/bundle.json","state_url":"https://pith.science/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T20:14:25Z","links":{"resolver":"https://pith.science/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B","bundle":"https://pith.science/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B/bundle.json","state":"https://pith.science/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BVHKYF3DTR7ZLD7LKNRVZBQ62B/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:BVHKYF3DTR7ZLD7LKNRVZBQ62B","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5225f08639ec653413ae5105dd520b78b01ce4316e8c4949119f45f4a7f4164c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-04T00:08:35Z","title_canon_sha256":"704c9b4d263ee671a26dbb004028d77b1a000f12c4b90f9a90aad9159ed6c7e8"},"schema_version":"1.0","source":{"id":"2602.04094","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.04094","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"arxiv_version","alias_value":"2602.04094v2","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.04094","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"pith_short_12","alias_value":"BVHKYF3DTR7Z","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"pith_short_16","alias_value":"BVHKYF3DTR7ZLD7L","created_at":"2026-06-02T01:04:15Z"},{"alias_kind":"pith_short_8","alias_value":"BVHKYF3D","created_at":"2026-06-02T01:04:15Z"}],"graph_snapshots":[{"event_id":"sha256:c750e837d84257bb4c74d3fe08153f64a2751dacadb01204fb35560f39131211","target":"graph","created_at":"2026-06-02T01:04:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.04094/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Long-form video understanding remains challenging for Vision-Language Models (VLMs) due to the inherent tension between computational constraints and the need to capture information distributed across thousands of frames. Existing approaches either sample frames uniformly (risking information loss) or select keyframes in a single pass (with no recovery from poor choices). We propose VideoBrain, an end-to-end framework that enables VLMs to adaptively acquire visual information through learned sampling policies. Our approach features dual complementary agents: a CLIP-based agent for semantic ret","authors_text":"Junbo Zou, Liwen Zhang, Shengjie Zhang, Weining Shen, Ziheng Huang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-04T00:08:35Z","title":"VideoBrain: Learning Adaptive Frame Sampling for Long Video Understanding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.04094","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a5322da7c28c37f7969c3bd1e8740862778615b85978e8a828ec578763ae028b","target":"record","created_at":"2026-06-02T01:04:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5225f08639ec653413ae5105dd520b78b01ce4316e8c4949119f45f4a7f4164c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-02-04T00:08:35Z","title_canon_sha256":"704c9b4d263ee671a26dbb004028d77b1a000f12c4b90f9a90aad9159ed6c7e8"},"schema_version":"1.0","source":{"id":"2602.04094","kind":"arxiv","version":2}},"canonical_sha256":"0d4eac17639c7f958feb53635c861ed06f006223bf90db558bdbddd92c1c1d80","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0d4eac17639c7f958feb53635c861ed06f006223bf90db558bdbddd92c1c1d80","first_computed_at":"2026-06-02T01:04:15.605117Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T01:04:15.605117Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"OtrSLOPManhHhds9qU3z85ScEPy7Zs+KfLIm4FSKsSOwSJqpcEORGldNR3z2fmVtWTb1jevaW3u7ivkEhNTJAg==","signature_status":"signed_v1","signed_at":"2026-06-02T01:04:15.605664Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.04094","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a5322da7c28c37f7969c3bd1e8740862778615b85978e8a828ec578763ae028b","sha256:c750e837d84257bb4c74d3fe08153f64a2751dacadb01204fb35560f39131211"],"state_sha256":"b1af67ef25ea9e7acf0df47f0e8a5721f62e4d6e78245ad7553fc449865bf6ed"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KHwj4xNPfINAAyToI+E+n1jkKqDmOCFGyllWmGpJm1qtXdVpm6QPJZz0cHMXv+4DYPVSrDkOMEirNbJ13c0BCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T20:14:25.161118Z","bundle_sha256":"889510f1d5826f0b8782c362ae59b2b1b111617fa8ac89b1d135415289f7f1b4"}}