{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:SUMCLBSWVGNSWMC2LSX6MR36RB","short_pith_number":"pith:SUMCLBSW","canonical_record":{"source":{"id":"2606.25391","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-24T04:42:57Z","cross_cats_sorted":["cs.AI","cs.MM"],"title_canon_sha256":"7f3968acd718c472d10d43b2ba94be847c14fc5fdd5234849b08a803a04285b4","abstract_canon_sha256":"c73921d72bb9af4117c86414dfaa5ed4ef19992dbf2e30662067f71288145649"},"schema_version":"1.0"},"canonical_sha256":"9518258656a99b2b305a5cafe6477e886d9d3b23391138fa7346240370a9e392","source":{"kind":"arxiv","id":"2606.25391","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.25391","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"arxiv_version","alias_value":"2606.25391v1","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.25391","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"pith_short_12","alias_value":"SUMCLBSWVGNS","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"pith_short_16","alias_value":"SUMCLBSWVGNSWMC2","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"pith_short_8","alias_value":"SUMCLBSW","created_at":"2026-06-25T01:18:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:SUMCLBSWVGNSWMC2LSX6MR36RB","target":"record","payload":{"canonical_record":{"source":{"id":"2606.25391","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-24T04:42:57Z","cross_cats_sorted":["cs.AI","cs.MM"],"title_canon_sha256":"7f3968acd718c472d10d43b2ba94be847c14fc5fdd5234849b08a803a04285b4","abstract_canon_sha256":"c73921d72bb9af4117c86414dfaa5ed4ef19992dbf2e30662067f71288145649"},"schema_version":"1.0"},"canonical_sha256":"9518258656a99b2b305a5cafe6477e886d9d3b23391138fa7346240370a9e392","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-25T01:18:04.099053Z","signature_b64":"87ly0LOvMBw6VIU+TWItGDvMesm5rYIiS+0zzRnK7eSlj5nreHeVX3diWP7/PqyzcM++H8+9APYV0U6P4OerBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9518258656a99b2b305a5cafe6477e886d9d3b23391138fa7346240370a9e392","last_reissued_at":"2026-06-25T01:18:04.098592Z","signature_status":"signed_v1","first_computed_at":"2026-06-25T01:18:04.098592Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.25391","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-25T01:18:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fkWoqctU6XhgeTy3PBmMs6BrtmfQFYiLo8Ck71rh+iFB+SvrgRSPm3MCQOK5k1cmgnMLC4crwT4gm9/viKXyBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:14:02.514977Z"},"content_sha256":"e77832a77e78c77ffc96cde6bf183b28856d50d01387116aeba4e35d2e073f17","schema_version":"1.0","event_id":"sha256:e77832a77e78c77ffc96cde6bf183b28856d50d01387116aeba4e35d2e073f17"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:SUMCLBSWVGNSWMC2LSX6MR36RB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"From Sounds to Scenes: A Benchmark for Evaluating Context-Aware Auditory Scene Understanding in Large Audio Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.MM"],"primary_cat":"cs.SD","authors_text":"Amir M. Rahmani, Henry Peng Zou, Hoang H Nguyen, Honghui Xu, Kazi Shaharair Sharif, Pengfei Zhang, Pinxin Liu, Wenjun Huang, Yutong Song","submitted_at":"2026-06-24T04:42:57Z","abstract_excerpt":"Recent Large Audio Language Models (LALMs) have achieved remarkable progress in audio perceptual tasks across individual acoustic layers, including speech, sound, and music. However, existing benchmarks predominantly evaluate these layers in isolation, overlooking the complex contextual relationships that arise when multiple acoustic sources co-occur in real-world auditory scenes. Real-world auditory interpretation requires Context-Aware Auditory Scene Understanding (CASU): the ability to comprehend the holistic scene by integrating sound layers. To evaluate this capability, we introduce the C"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.25391","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.25391/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-25T01:18:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"V9oQYnbUc5kjMpzV6FV2dkUkDtyYPRIYLn+PvC2V6tqMW6b519/AtInEkAbMZGmKs+MrPZMdwMKTs/UUShiMBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:14:02.515382Z"},"content_sha256":"bbeef1cfb5960aa4789d439bb2eeaae6a3230f51eae1d7629a0bd621d40656df","schema_version":"1.0","event_id":"sha256:bbeef1cfb5960aa4789d439bb2eeaae6a3230f51eae1d7629a0bd621d40656df"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SUMCLBSWVGNSWMC2LSX6MR36RB/bundle.json","state_url":"https://pith.science/pith/SUMCLBSWVGNSWMC2LSX6MR36RB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SUMCLBSWVGNSWMC2LSX6MR36RB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T21:14:02Z","links":{"resolver":"https://pith.science/pith/SUMCLBSWVGNSWMC2LSX6MR36RB","bundle":"https://pith.science/pith/SUMCLBSWVGNSWMC2LSX6MR36RB/bundle.json","state":"https://pith.science/pith/SUMCLBSWVGNSWMC2LSX6MR36RB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SUMCLBSWVGNSWMC2LSX6MR36RB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SUMCLBSWVGNSWMC2LSX6MR36RB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c73921d72bb9af4117c86414dfaa5ed4ef19992dbf2e30662067f71288145649","cross_cats_sorted":["cs.AI","cs.MM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-24T04:42:57Z","title_canon_sha256":"7f3968acd718c472d10d43b2ba94be847c14fc5fdd5234849b08a803a04285b4"},"schema_version":"1.0","source":{"id":"2606.25391","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.25391","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"arxiv_version","alias_value":"2606.25391v1","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.25391","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"pith_short_12","alias_value":"SUMCLBSWVGNS","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"pith_short_16","alias_value":"SUMCLBSWVGNSWMC2","created_at":"2026-06-25T01:18:04Z"},{"alias_kind":"pith_short_8","alias_value":"SUMCLBSW","created_at":"2026-06-25T01:18:04Z"}],"graph_snapshots":[{"event_id":"sha256:bbeef1cfb5960aa4789d439bb2eeaae6a3230f51eae1d7629a0bd621d40656df","target":"graph","created_at":"2026-06-25T01:18:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.25391/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent Large Audio Language Models (LALMs) have achieved remarkable progress in audio perceptual tasks across individual acoustic layers, including speech, sound, and music. However, existing benchmarks predominantly evaluate these layers in isolation, overlooking the complex contextual relationships that arise when multiple acoustic sources co-occur in real-world auditory scenes. Real-world auditory interpretation requires Context-Aware Auditory Scene Understanding (CASU): the ability to comprehend the holistic scene by integrating sound layers. To evaluate this capability, we introduce the C","authors_text":"Amir M. Rahmani, Henry Peng Zou, Hoang H Nguyen, Honghui Xu, Kazi Shaharair Sharif, Pengfei Zhang, Pinxin Liu, Wenjun Huang, Yutong Song","cross_cats":["cs.AI","cs.MM"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-24T04:42:57Z","title":"From Sounds to Scenes: A Benchmark for Evaluating Context-Aware Auditory Scene Understanding in Large Audio Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.25391","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e77832a77e78c77ffc96cde6bf183b28856d50d01387116aeba4e35d2e073f17","target":"record","created_at":"2026-06-25T01:18:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c73921d72bb9af4117c86414dfaa5ed4ef19992dbf2e30662067f71288145649","cross_cats_sorted":["cs.AI","cs.MM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-06-24T04:42:57Z","title_canon_sha256":"7f3968acd718c472d10d43b2ba94be847c14fc5fdd5234849b08a803a04285b4"},"schema_version":"1.0","source":{"id":"2606.25391","kind":"arxiv","version":1}},"canonical_sha256":"9518258656a99b2b305a5cafe6477e886d9d3b23391138fa7346240370a9e392","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9518258656a99b2b305a5cafe6477e886d9d3b23391138fa7346240370a9e392","first_computed_at":"2026-06-25T01:18:04.098592Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-25T01:18:04.098592Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"87ly0LOvMBw6VIU+TWItGDvMesm5rYIiS+0zzRnK7eSlj5nreHeVX3diWP7/PqyzcM++H8+9APYV0U6P4OerBQ==","signature_status":"signed_v1","signed_at":"2026-06-25T01:18:04.099053Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.25391","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e77832a77e78c77ffc96cde6bf183b28856d50d01387116aeba4e35d2e073f17","sha256:bbeef1cfb5960aa4789d439bb2eeaae6a3230f51eae1d7629a0bd621d40656df"],"state_sha256":"78c91060045a42a5251f9e36fe907868a8ed74dc0d15436e7cb312f84282dacf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lUC+mG4kZHfJq1/mgo7vK5S1wuUKQrZVOybcfz2DmlnvdZHXYW7HuB/EWzuPecvWFxmmiaoad8HTpWLxWWALCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T21:14:02.519399Z","bundle_sha256":"1f3a183bbd9d12139d5c30f2fa8338ce3f37c672c26f22e5aec2c0e35754b808"}}