{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:BIYHPIZJKOKBQT5PDSH2QDVCHT","short_pith_number":"pith:BIYHPIZJ","canonical_record":{"source":{"id":"2606.07264","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T13:39:39Z","cross_cats_sorted":[],"title_canon_sha256":"f44d5e139e335f91911c68992959b5f5752ed7ff3d4019e7b3c0c8dc771c8493","abstract_canon_sha256":"0a0fb994699aa99cc8747700ab6d3bf075dafc2de91cefaf2f3d05cb60f8f7a8"},"schema_version":"1.0"},"canonical_sha256":"0a3077a3295394184faf1c8fa80ea23cc52933879b4b207f78c7fed0a8d162c7","source":{"kind":"arxiv","id":"2606.07264","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07264","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07264v1","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07264","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"pith_short_12","alias_value":"BIYHPIZJKOKB","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"pith_short_16","alias_value":"BIYHPIZJKOKBQT5P","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"pith_short_8","alias_value":"BIYHPIZJ","created_at":"2026-06-08T01:05:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:BIYHPIZJKOKBQT5PDSH2QDVCHT","target":"record","payload":{"canonical_record":{"source":{"id":"2606.07264","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T13:39:39Z","cross_cats_sorted":[],"title_canon_sha256":"f44d5e139e335f91911c68992959b5f5752ed7ff3d4019e7b3c0c8dc771c8493","abstract_canon_sha256":"0a0fb994699aa99cc8747700ab6d3bf075dafc2de91cefaf2f3d05cb60f8f7a8"},"schema_version":"1.0"},"canonical_sha256":"0a3077a3295394184faf1c8fa80ea23cc52933879b4b207f78c7fed0a8d162c7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:05:16.616201Z","signature_b64":"sZ1XYd4+MgI/gXy7hr5Oul8jqMN6KiSgfycooOKKBxeKdD8RPrjAgl32t7EL+2sXg2phN7VR3SpY8W9c/ZDMAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0a3077a3295394184faf1c8fa80ea23cc52933879b4b207f78c7fed0a8d162c7","last_reissued_at":"2026-06-08T01:05:16.615242Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:05:16.615242Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.07264","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:05:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"N8Bl+DG5z8lhOZntZid+JJ3D10W8jPFtcjPChGaKZ1Wxnnl+xI9vd3yCccwH4vMREphKjXWsNayFsFTJo7U0CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T20:29:40.862646Z"},"content_sha256":"db74878b186d593d1809169d8fbaf64d8c69d3e1ad4d81c8fcba0447682680a5","schema_version":"1.0","event_id":"sha256:db74878b186d593d1809169d8fbaf64d8c69d3e1ad4d81c8fcba0447682680a5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:BIYHPIZJKOKBQT5PDSH2QDVCHT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VISA: A Visual Information Strengthened Audio-Reasoning System for the Interspeech 2026 ARC Agent Track","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Bohan Li, Jian Gao, Jing Peng, Kai Yu, Shuai Fan, Tao Liu, Wenming Tu, Xie Chen, Yanru Huo, Yixuan Wang, Zilong Zheng, Ziyang Ma","submitted_at":"2026-06-05T13:39:39Z","abstract_excerpt":"Audio reasoning requires multi-step, evidence-grounded inference over temporally dynamic and acoustically mixed signals, exceeding conventional perception tasks such as ASR or captioning. We present VISA, our submission to the Interspeech 2026 Audio Reasoning Challenge (Agent Track), evaluated via the MMAR Rubrics for correctness and reasoning quality. Under a \"LALM as a Tool\" paradigm, VISA strengthens large audio language models with auxiliary multi-modal evidence while avoiding heavy orchestration. The system integrates three components: multi-modal feature extraction for complementary audi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07264","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.07264/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:05:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5Brryc6OCunjRYXhwsF9v9Wybyig8WUthcS+9KXQrxhNZ9blEFqhRvM1Dpp6c4mW76MoprRzXzIYXiXFc3nlDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T20:29:40.863454Z"},"content_sha256":"14a1bd028e377188568bca040758decbe75b1fbfbd8bf25523b56c8e270d123c","schema_version":"1.0","event_id":"sha256:14a1bd028e377188568bca040758decbe75b1fbfbd8bf25523b56c8e270d123c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT/bundle.json","state_url":"https://pith.science/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T20:29:40Z","links":{"resolver":"https://pith.science/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT","bundle":"https://pith.science/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT/bundle.json","state":"https://pith.science/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BIYHPIZJKOKBQT5PDSH2QDVCHT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:BIYHPIZJKOKBQT5PDSH2QDVCHT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0a0fb994699aa99cc8747700ab6d3bf075dafc2de91cefaf2f3d05cb60f8f7a8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T13:39:39Z","title_canon_sha256":"f44d5e139e335f91911c68992959b5f5752ed7ff3d4019e7b3c0c8dc771c8493"},"schema_version":"1.0","source":{"id":"2606.07264","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07264","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07264v1","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07264","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"pith_short_12","alias_value":"BIYHPIZJKOKB","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"pith_short_16","alias_value":"BIYHPIZJKOKBQT5P","created_at":"2026-06-08T01:05:16Z"},{"alias_kind":"pith_short_8","alias_value":"BIYHPIZJ","created_at":"2026-06-08T01:05:16Z"}],"graph_snapshots":[{"event_id":"sha256:14a1bd028e377188568bca040758decbe75b1fbfbd8bf25523b56c8e270d123c","target":"graph","created_at":"2026-06-08T01:05:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07264/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Audio reasoning requires multi-step, evidence-grounded inference over temporally dynamic and acoustically mixed signals, exceeding conventional perception tasks such as ASR or captioning. We present VISA, our submission to the Interspeech 2026 Audio Reasoning Challenge (Agent Track), evaluated via the MMAR Rubrics for correctness and reasoning quality. Under a \"LALM as a Tool\" paradigm, VISA strengthens large audio language models with auxiliary multi-modal evidence while avoiding heavy orchestration. The system integrates three components: multi-modal feature extraction for complementary audi","authors_text":"Bohan Li, Jian Gao, Jing Peng, Kai Yu, Shuai Fan, Tao Liu, Wenming Tu, Xie Chen, Yanru Huo, Yixuan Wang, Zilong Zheng, Ziyang Ma","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T13:39:39Z","title":"VISA: A Visual Information Strengthened Audio-Reasoning System for the Interspeech 2026 ARC Agent Track"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07264","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:db74878b186d593d1809169d8fbaf64d8c69d3e1ad4d81c8fcba0447682680a5","target":"record","created_at":"2026-06-08T01:05:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0a0fb994699aa99cc8747700ab6d3bf075dafc2de91cefaf2f3d05cb60f8f7a8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T13:39:39Z","title_canon_sha256":"f44d5e139e335f91911c68992959b5f5752ed7ff3d4019e7b3c0c8dc771c8493"},"schema_version":"1.0","source":{"id":"2606.07264","kind":"arxiv","version":1}},"canonical_sha256":"0a3077a3295394184faf1c8fa80ea23cc52933879b4b207f78c7fed0a8d162c7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0a3077a3295394184faf1c8fa80ea23cc52933879b4b207f78c7fed0a8d162c7","first_computed_at":"2026-06-08T01:05:16.615242Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:05:16.615242Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sZ1XYd4+MgI/gXy7hr5Oul8jqMN6KiSgfycooOKKBxeKdD8RPrjAgl32t7EL+2sXg2phN7VR3SpY8W9c/ZDMAQ==","signature_status":"signed_v1","signed_at":"2026-06-08T01:05:16.616201Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07264","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:db74878b186d593d1809169d8fbaf64d8c69d3e1ad4d81c8fcba0447682680a5","sha256:14a1bd028e377188568bca040758decbe75b1fbfbd8bf25523b56c8e270d123c"],"state_sha256":"ebb4f430a6dc3426565490a789c4cb2b4244c4df8158455b322eac16f552508b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oFzpZwyvkvDR6HOG9ZMWMLugjfLFJV+fVer8ytLEnKNMYp/gANaYL7WY+hWplo6Nx5Qv/aMnJWou4eadTg5PDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T20:29:40.867513Z","bundle_sha256":"1cfb75aa23cd5718c6ad1227c6915ae2ed9beff58d4aa9cbc5c0c8918b7b32c3"}}