{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:USFK5FKNNWCJNRFI2TAMB26TKW","short_pith_number":"pith:USFK5FKN","canonical_record":{"source":{"id":"2605.12725","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T20:29:49Z","cross_cats_sorted":[],"title_canon_sha256":"d998bcd8018cc4e0b9a433f571ccb287d1fce6cbde7b10b0703d703b5c831743","abstract_canon_sha256":"471d9ea74ff0f314fee16cdba331d8c1dab9a82002292f91316c9f8033fe1cdb"},"schema_version":"1.0"},"canonical_sha256":"a48aae954d6d8496c4a8d4c0c0ebd3558ff2d098387d261e8428d23b3663cbc7","source":{"kind":"arxiv","id":"2605.12725","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12725","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12725v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12725","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"USFK5FKNNWCJ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"USFK5FKNNWCJNRFI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"USFK5FKN","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:USFK5FKNNWCJNRFI2TAMB26TKW","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12725","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T20:29:49Z","cross_cats_sorted":[],"title_canon_sha256":"d998bcd8018cc4e0b9a433f571ccb287d1fce6cbde7b10b0703d703b5c831743","abstract_canon_sha256":"471d9ea74ff0f314fee16cdba331d8c1dab9a82002292f91316c9f8033fe1cdb"},"schema_version":"1.0"},"canonical_sha256":"a48aae954d6d8496c4a8d4c0c0ebd3558ff2d098387d261e8428d23b3663cbc7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:49.336930Z","signature_b64":"Q9ft6mDPXh8Ak38gVPGfG0+wU8tDk5PrPnea1dyN+IYLWtdrsD/mCDMt37iLh085SjQQxUHaHOoYT4hvhKcSDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a48aae954d6d8496c4a8d4c0c0ebd3558ff2d098387d261e8428d23b3663cbc7","last_reissued_at":"2026-05-18T03:09:49.336088Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:49.336088Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12725","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FoarzUf3jLVRefgXhRBnZC3MQBk37Pa8uNJJwzAIQVAJgJwkw2GeCs1blVA3DoVjIInzBOj4NeAzU+K+Jf//AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:00:49.261548Z"},"content_sha256":"19c6ae499174bcbe22325be8c30f4bb942dcebf2076d98137dc9a2a55605e926","schema_version":"1.0","event_id":"sha256:19c6ae499174bcbe22325be8c30f4bb942dcebf2076d98137dc9a2a55605e926"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:USFK5FKNNWCJNRFI2TAMB26TKW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Is Video Anomaly Detection Misframed? Evidence from LLM-Based and Multi-Scene Models","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"Video anomaly detection research has shifted to multi-scene LLM models that reduce the task to semantic category recognition rather than scene-specific normality.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Anoop Cherian, Furkan Mumcu, Michael J. Jones, Yasin Yilmaz","submitted_at":"2026-05-12T20:29:49Z","abstract_excerpt":"Recent video anomaly detection research has expanded rapidly with an emphasis on general models of normality intended to work across many different scenes. While this focus has led to improvements in scalability and multi-scene generalization, it has also shifted the field away from modeling the scene-specific and context-dependent nature of normal behavior. Contemporary approaches frequently rely on video-level weak supervision and opaque pretrained representations from multi-modal large language models (MLLMs), which encourage models to respond to familiar semantic anomaly categories rather "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"meaningful progress in VAD requires renewed focus on single-scene, spatially-aware, and explainable formulations that capture the nuanced structure of normality within individual environments.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"Real-world video anomaly detection is typically performed within a single scene where normality is determined by local geometry, semantics, and activity patterns.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Video anomaly detection is misframed by multi-scene LLM models that reduce the task to semantic action recognition instead of capturing local scene normality, requiring a return to single-scene spatially-aware methods.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Video anomaly detection research has shifted to multi-scene LLM models that reduce the task to semantic category recognition rather than scene-specific normality.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"2c5818fb05d9dc59e9b13a0f724c11e613aeaefb89dd61e7629c4ec88f5c5ce8"},"source":{"id":"2605.12725","kind":"arxiv","version":1},"verdict":{"id":"2b351901-b27d-4fac-ad09-97f90e241bea","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T20:43:59.215636Z","strongest_claim":"meaningful progress in VAD requires renewed focus on single-scene, spatially-aware, and explainable formulations that capture the nuanced structure of normality within individual environments.","one_line_summary":"Video anomaly detection is misframed by multi-scene LLM models that reduce the task to semantic action recognition instead of capturing local scene normality, requiring a return to single-scene spatially-aware methods.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"Real-world video anomaly detection is typically performed within a single scene where normality is determined by local geometry, semantics, and activity patterns.","pith_extraction_headline":"Video anomaly detection research has shifted to multi-scene LLM models that reduce the task to semantic category recognition rather than scene-specific normality."},"references":{"count":57,"sample":[{"doi":"","year":2024,"title":"A coarse-to-fine pseudo-labeling (c2fpl) framework for unsupervised video anomaly detection","work_id":"65c6beda-e4a7-4179-a2ea-6b3d5df1ae10","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Collab- orative learning of anomalies with privacy (clap) for unsupervised video anomaly detection: A new base- line","work_id":"369ca402-9cd5-4a16-b2a7-d6613712d4eb","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Advancing video anomaly detection: A concise review and a new dataset","work_id":"2570a114-d738-416e-920c-02c9d50e65e5","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Prompt-enhanced multiple instance learning for weakly supervised video anomaly detec- tion","work_id":"091ec88f-6e13-4ce7-a5e8-ebcdff45e473","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Generalizing single-frame supervi- sion to event-level understanding for video anomaly detection","work_id":"c6fccec1-eec2-4ce0-938c-c20d3a847120","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":57,"snapshot_sha256":"aec0141b290c619d592b448693ebdf8627ca0e6ae4c8a3eeb14c7046092a48eb","internal_anchors":1},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"2b351901-b27d-4fac-ad09-97f90e241bea"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A7BNLK9olEyYyt5D3NFM2O/MxkTE3PiRWV9xP8dMwqrBIq/8JYtNdz+7qBxCcfuZ5ouSC6Ir3OzWSIMEhol0AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:00:49.262608Z"},"content_sha256":"04f0d769159eeea592e75b68aa7c42856a28fcb3ce90a0a78b2c98f2ce4b1cd5","schema_version":"1.0","event_id":"sha256:04f0d769159eeea592e75b68aa7c42856a28fcb3ce90a0a78b2c98f2ce4b1cd5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/USFK5FKNNWCJNRFI2TAMB26TKW/bundle.json","state_url":"https://pith.science/pith/USFK5FKNNWCJNRFI2TAMB26TKW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/USFK5FKNNWCJNRFI2TAMB26TKW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T13:00:49Z","links":{"resolver":"https://pith.science/pith/USFK5FKNNWCJNRFI2TAMB26TKW","bundle":"https://pith.science/pith/USFK5FKNNWCJNRFI2TAMB26TKW/bundle.json","state":"https://pith.science/pith/USFK5FKNNWCJNRFI2TAMB26TKW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/USFK5FKNNWCJNRFI2TAMB26TKW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:USFK5FKNNWCJNRFI2TAMB26TKW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"471d9ea74ff0f314fee16cdba331d8c1dab9a82002292f91316c9f8033fe1cdb","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T20:29:49Z","title_canon_sha256":"d998bcd8018cc4e0b9a433f571ccb287d1fce6cbde7b10b0703d703b5c831743"},"schema_version":"1.0","source":{"id":"2605.12725","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12725","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12725v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12725","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"USFK5FKNNWCJ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"USFK5FKNNWCJNRFI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"USFK5FKN","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:04f0d769159eeea592e75b68aa7c42856a28fcb3ce90a0a78b2c98f2ce4b1cd5","target":"graph","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"meaningful progress in VAD requires renewed focus on single-scene, spatially-aware, and explainable formulations that capture the nuanced structure of normality within individual environments."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"Real-world video anomaly detection is typically performed within a single scene where normality is determined by local geometry, semantics, and activity patterns."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Video anomaly detection is misframed by multi-scene LLM models that reduce the task to semantic action recognition instead of capturing local scene normality, requiring a return to single-scene spatially-aware methods."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Video anomaly detection research has shifted to multi-scene LLM models that reduce the task to semantic category recognition rather than scene-specific normality."}],"snapshot_sha256":"2c5818fb05d9dc59e9b13a0f724c11e613aeaefb89dd61e7629c4ec88f5c5ce8"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recent video anomaly detection research has expanded rapidly with an emphasis on general models of normality intended to work across many different scenes. While this focus has led to improvements in scalability and multi-scene generalization, it has also shifted the field away from modeling the scene-specific and context-dependent nature of normal behavior. Contemporary approaches frequently rely on video-level weak supervision and opaque pretrained representations from multi-modal large language models (MLLMs), which encourage models to respond to familiar semantic anomaly categories rather ","authors_text":"Anoop Cherian, Furkan Mumcu, Michael J. Jones, Yasin Yilmaz","cross_cats":[],"headline":"Video anomaly detection research has shifted to multi-scene LLM models that reduce the task to semantic category recognition rather than scene-specific normality.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T20:29:49Z","title":"Is Video Anomaly Detection Misframed? Evidence from LLM-Based and Multi-Scene Models"},"references":{"count":57,"internal_anchors":1,"resolved_work":57,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"A coarse-to-fine pseudo-labeling (c2fpl) framework for unsupervised video anomaly detection","work_id":"65c6beda-e4a7-4179-a2ea-6b3d5df1ae10","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Collab- orative learning of anomalies with privacy (clap) for unsupervised video anomaly detection: A new base- line","work_id":"369ca402-9cd5-4a16-b2a7-d6613712d4eb","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Advancing video anomaly detection: A concise review and a new dataset","work_id":"2570a114-d738-416e-920c-02c9d50e65e5","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Prompt-enhanced multiple instance learning for weakly supervised video anomaly detec- tion","work_id":"091ec88f-6e13-4ce7-a5e8-ebcdff45e473","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Generalizing single-frame supervi- sion to event-level understanding for video anomaly detection","work_id":"c6fccec1-eec2-4ce0-938c-c20d3a847120","year":2025}],"snapshot_sha256":"aec0141b290c619d592b448693ebdf8627ca0e6ae4c8a3eeb14c7046092a48eb"},"source":{"id":"2605.12725","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:43:59.215636Z","id":"2b351901-b27d-4fac-ad09-97f90e241bea","model_set":{"reader":"grok-4.3"},"one_line_summary":"Video anomaly detection is misframed by multi-scene LLM models that reduce the task to semantic action recognition instead of capturing local scene normality, requiring a return to single-scene spatially-aware methods.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Video anomaly detection research has shifted to multi-scene LLM models that reduce the task to semantic category recognition rather than scene-specific normality.","strongest_claim":"meaningful progress in VAD requires renewed focus on single-scene, spatially-aware, and explainable formulations that capture the nuanced structure of normality within individual environments.","weakest_assumption":"Real-world video anomaly detection is typically performed within a single scene where normality is determined by local geometry, semantics, and activity patterns."}},"verdict_id":"2b351901-b27d-4fac-ad09-97f90e241bea"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:19c6ae499174bcbe22325be8c30f4bb942dcebf2076d98137dc9a2a55605e926","target":"record","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"471d9ea74ff0f314fee16cdba331d8c1dab9a82002292f91316c9f8033fe1cdb","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T20:29:49Z","title_canon_sha256":"d998bcd8018cc4e0b9a433f571ccb287d1fce6cbde7b10b0703d703b5c831743"},"schema_version":"1.0","source":{"id":"2605.12725","kind":"arxiv","version":1}},"canonical_sha256":"a48aae954d6d8496c4a8d4c0c0ebd3558ff2d098387d261e8428d23b3663cbc7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a48aae954d6d8496c4a8d4c0c0ebd3558ff2d098387d261e8428d23b3663cbc7","first_computed_at":"2026-05-18T03:09:49.336088Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:49.336088Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Q9ft6mDPXh8Ak38gVPGfG0+wU8tDk5PrPnea1dyN+IYLWtdrsD/mCDMt37iLh085SjQQxUHaHOoYT4hvhKcSDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:49.336930Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12725","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:19c6ae499174bcbe22325be8c30f4bb942dcebf2076d98137dc9a2a55605e926","sha256:04f0d769159eeea592e75b68aa7c42856a28fcb3ce90a0a78b2c98f2ce4b1cd5"],"state_sha256":"140036b8e94b72af8eb53a20502623f0a7c2e88a32699b7231d2603de07153a0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jw4ESzkKq4Kyo9Iqf3IkAtUVnA0xfk/kVm2SZXqBq/biKyZ6CtkYsV9YkBEPI+hfk/bboDiR9PrQoSnF1YjjAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T13:00:49.267481Z","bundle_sha256":"ccda607f31dbb8b347527c6bc00a303beeaa8e3c9d64eab1673c534813e85de4"}}