{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:5NDZNTVFANVGO2CKEN4H5XLTPI","short_pith_number":"pith:5NDZNTVF","canonical_record":{"source":{"id":"2605.22158","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-21T08:27:15Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"070c18343e64acde7c7d99523d312913d13a5cbcab3ee59ee35edd2a609476dc","abstract_canon_sha256":"eefd483eff8220874e32c8a0a945b631797cf7f89132fb51c2a6fa0c3603b90c"},"schema_version":"1.0"},"canonical_sha256":"eb4796cea5036a67684a23787edd737a21f6b5932864b720c8ea5a0376c06a3a","source":{"kind":"arxiv","id":"2605.22158","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.22158","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"arxiv_version","alias_value":"2605.22158v1","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22158","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"pith_short_12","alias_value":"5NDZNTVFANVG","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"pith_short_16","alias_value":"5NDZNTVFANVGO2CK","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"pith_short_8","alias_value":"5NDZNTVF","created_at":"2026-05-22T01:04:29Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:5NDZNTVFANVGO2CKEN4H5XLTPI","target":"record","payload":{"canonical_record":{"source":{"id":"2605.22158","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-21T08:27:15Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"070c18343e64acde7c7d99523d312913d13a5cbcab3ee59ee35edd2a609476dc","abstract_canon_sha256":"eefd483eff8220874e32c8a0a945b631797cf7f89132fb51c2a6fa0c3603b90c"},"schema_version":"1.0"},"canonical_sha256":"eb4796cea5036a67684a23787edd737a21f6b5932864b720c8ea5a0376c06a3a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:29.109493Z","signature_b64":"MuEggdUZzZ3/lk0N+cOk8MwlS2m9amUtoZpMxJB2T0LluO96tBD7V4LIAUHLnU/lZ/NDpn8u+kuXs00A7Xl7Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eb4796cea5036a67684a23787edd737a21f6b5932864b720c8ea5a0376c06a3a","last_reissued_at":"2026-05-22T01:04:29.108771Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:29.108771Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.22158","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EGF+xIPl0IFD+ehAfL764GmNa7gZYu+z0I60LrFIGNmCnG+fEwZB+27tvVz73hmXbG6eP1LM9/zos+mNlC4aBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T14:25:55.740550Z"},"content_sha256":"735beaec8cf07cf51dd3432bc9209376f0763e4888e6cf35da2e510aab343c11","schema_version":"1.0","event_id":"sha256:735beaec8cf07cf51dd3432bc9209376f0763e4888e6cf35da2e510aab343c11"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:5NDZNTVFANVGO2CKEN4H5XLTPI","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ST-SimDiff: Balancing Spatiotemporal Similarity and Difference for Efficient Video Understanding with MLLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.AI","authors_text":"Bingjun Luo, Chaoqi Chen, Tony Wang, Xinpeng Ding","submitted_at":"2026-05-21T08:27:15Z","abstract_excerpt":"Multimodal Large Language Models (MLLMs) face significant computational overhead when processing long videos due to the massive number of visual tokens required. To improve efficiency, existing methods primarily reduce redundancy by pruning or merging tokens based on importance or similarity. However, these approaches largely overlook a critical dimension of video content, i.e., changes and turning points, and they lack a collaborative model for spatio-temporal relationships. To address this, we propose a new perspective: similarity is for identifying redundancy, while difference is for captur"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22158","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.22158/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C0ifOoU7/54VnEm8bsdF3kWjaXTKOZY+wva4izwYanMXtfVLzc4DyscA7rKokf2BaO75ME6pQ0oeJ/Gh6TGQDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T14:25:55.741282Z"},"content_sha256":"1250dd02ed3458694702b1f5b02b1e4035a2ea7e8d78b3c0b5c5f60a884a11c0","schema_version":"1.0","event_id":"sha256:1250dd02ed3458694702b1f5b02b1e4035a2ea7e8d78b3c0b5c5f60a884a11c0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5NDZNTVFANVGO2CKEN4H5XLTPI/bundle.json","state_url":"https://pith.science/pith/5NDZNTVFANVGO2CKEN4H5XLTPI/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5NDZNTVFANVGO2CKEN4H5XLTPI/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T14:25:55Z","links":{"resolver":"https://pith.science/pith/5NDZNTVFANVGO2CKEN4H5XLTPI","bundle":"https://pith.science/pith/5NDZNTVFANVGO2CKEN4H5XLTPI/bundle.json","state":"https://pith.science/pith/5NDZNTVFANVGO2CKEN4H5XLTPI/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5NDZNTVFANVGO2CKEN4H5XLTPI/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:5NDZNTVFANVGO2CKEN4H5XLTPI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"eefd483eff8220874e32c8a0a945b631797cf7f89132fb51c2a6fa0c3603b90c","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-21T08:27:15Z","title_canon_sha256":"070c18343e64acde7c7d99523d312913d13a5cbcab3ee59ee35edd2a609476dc"},"schema_version":"1.0","source":{"id":"2605.22158","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.22158","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"arxiv_version","alias_value":"2605.22158v1","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22158","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"pith_short_12","alias_value":"5NDZNTVFANVG","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"pith_short_16","alias_value":"5NDZNTVFANVGO2CK","created_at":"2026-05-22T01:04:29Z"},{"alias_kind":"pith_short_8","alias_value":"5NDZNTVF","created_at":"2026-05-22T01:04:29Z"}],"graph_snapshots":[{"event_id":"sha256:1250dd02ed3458694702b1f5b02b1e4035a2ea7e8d78b3c0b5c5f60a884a11c0","target":"graph","created_at":"2026-05-22T01:04:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.22158/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Multimodal Large Language Models (MLLMs) face significant computational overhead when processing long videos due to the massive number of visual tokens required. To improve efficiency, existing methods primarily reduce redundancy by pruning or merging tokens based on importance or similarity. However, these approaches largely overlook a critical dimension of video content, i.e., changes and turning points, and they lack a collaborative model for spatio-temporal relationships. To address this, we propose a new perspective: similarity is for identifying redundancy, while difference is for captur","authors_text":"Bingjun Luo, Chaoqi Chen, Tony Wang, Xinpeng Ding","cross_cats":["cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-21T08:27:15Z","title":"ST-SimDiff: Balancing Spatiotemporal Similarity and Difference for Efficient Video Understanding with MLLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22158","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:735beaec8cf07cf51dd3432bc9209376f0763e4888e6cf35da2e510aab343c11","target":"record","created_at":"2026-05-22T01:04:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"eefd483eff8220874e32c8a0a945b631797cf7f89132fb51c2a6fa0c3603b90c","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-21T08:27:15Z","title_canon_sha256":"070c18343e64acde7c7d99523d312913d13a5cbcab3ee59ee35edd2a609476dc"},"schema_version":"1.0","source":{"id":"2605.22158","kind":"arxiv","version":1}},"canonical_sha256":"eb4796cea5036a67684a23787edd737a21f6b5932864b720c8ea5a0376c06a3a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"eb4796cea5036a67684a23787edd737a21f6b5932864b720c8ea5a0376c06a3a","first_computed_at":"2026-05-22T01:04:29.108771Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:29.108771Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"MuEggdUZzZ3/lk0N+cOk8MwlS2m9amUtoZpMxJB2T0LluO96tBD7V4LIAUHLnU/lZ/NDpn8u+kuXs00A7Xl7Bw==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:29.109493Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.22158","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:735beaec8cf07cf51dd3432bc9209376f0763e4888e6cf35da2e510aab343c11","sha256:1250dd02ed3458694702b1f5b02b1e4035a2ea7e8d78b3c0b5c5f60a884a11c0"],"state_sha256":"fbca1f743a2643ec89eb735a5573aab0cb632ee69eb5bf1cc268d853988b0e04"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"THKftMSEQjZWMkQlZ7WYXlsCeVzcMGjast3MHvVdSTWzjSRmjwJaIaJGydgIQc9Jxx0DkEIgYZiwtI+K3wTTAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T14:25:55.746380Z","bundle_sha256":"059a9de1a66047eba0c5c0e01b755c32711bef71776c1cc20a6366dadf738301"}}