{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:S7FX2JXGDFOHPNI7KOH4S73NQG","short_pith_number":"pith:S7FX2JXG","canonical_record":{"source":{"id":"2605.21008","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-20T10:44:56Z","cross_cats_sorted":[],"title_canon_sha256":"ae07026eebf6a5905216e8d8f945313520509589cea2ef9404e267e9691e4cf8","abstract_canon_sha256":"3d65eb3cf67189952122d7488d7c4f749cde137457b8bc10268847474a5092a2"},"schema_version":"1.0"},"canonical_sha256":"97cb7d26e6195c77b51f538fc97f6d81853f9bf3f3eee05f83e3a2c1ac531f8c","source":{"kind":"arxiv","id":"2605.21008","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21008","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21008v1","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21008","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"pith_short_12","alias_value":"S7FX2JXGDFOH","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"pith_short_16","alias_value":"S7FX2JXGDFOHPNI7","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"pith_short_8","alias_value":"S7FX2JXG","created_at":"2026-05-21T01:05:32Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:S7FX2JXGDFOHPNI7KOH4S73NQG","target":"record","payload":{"canonical_record":{"source":{"id":"2605.21008","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-20T10:44:56Z","cross_cats_sorted":[],"title_canon_sha256":"ae07026eebf6a5905216e8d8f945313520509589cea2ef9404e267e9691e4cf8","abstract_canon_sha256":"3d65eb3cf67189952122d7488d7c4f749cde137457b8bc10268847474a5092a2"},"schema_version":"1.0"},"canonical_sha256":"97cb7d26e6195c77b51f538fc97f6d81853f9bf3f3eee05f83e3a2c1ac531f8c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:32.129114Z","signature_b64":"zxMFcS/gyVY/NYdEaPAmuYsm9B9JcT2vn+P4nCh538MyQ6fqhqSvnFHbE0O3W5C9OphinBRhnqOUGfeR8W9XCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"97cb7d26e6195c77b51f538fc97f6d81853f9bf3f3eee05f83e3a2c1ac531f8c","last_reissued_at":"2026-05-21T01:05:32.125091Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:32.125091Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.21008","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sn6mKx77WuO6a7slJyQpOxj6bE79ET5KVM5Ns4KhRDaiQ1mm8iokNwYPsIRU6jiL2eFmKgGosFWxSXukG/cSDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T02:38:52.608157Z"},"content_sha256":"d37605841adb16f8ed02053baee4ffa52c5af957d5516e4a7e9a016a73c46a3b","schema_version":"1.0","event_id":"sha256:d37605841adb16f8ed02053baee4ffa52c5af957d5516e4a7e9a016a73c46a3b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:S7FX2JXGDFOHPNI7KOH4S73NQG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Survey of Audio Reasoning in Multimodal Foundation Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Daxin Tan, Dingdong Wang, Guan-Ting Lin, Han Shi, Irwin King, Jiaya Jia, Jing Xiong, Jingyao Li, Qiyong Zheng, Wenqian Cui, Zhihan Guo","submitted_at":"2026-05-20T10:44:56Z","abstract_excerpt":"Reasoning has become a defining capability of modern foundation models, yet its development in the audio modality remains limited. Audio poses challenges that are distinct from those of text and vision. It is continuous, temporally dense, and contains linguistic, paralinguistic, and environmental information at multiple time scales. As a result, audio reasoning models must align acoustic signals with the discrete semantic space of large language models, while still preserving fine-grained information needed for reliable inference. Progress is also limited by three major obstacles: the scarcity"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21008","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.21008/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EnBQJQGvmBl7lzYHeUoPGC1A9+qOx7wppVswGmbMxl1eyfpbywZlW4EAGcTnO2wCscnpwLyIWsU+BRn/aqWpDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T02:38:52.608732Z"},"content_sha256":"38e22ae4a27b4a65f60fee399f2f0d050be4ba94df9f939ff6a8c9b71a853ae3","schema_version":"1.0","event_id":"sha256:38e22ae4a27b4a65f60fee399f2f0d050be4ba94df9f939ff6a8c9b71a853ae3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/S7FX2JXGDFOHPNI7KOH4S73NQG/bundle.json","state_url":"https://pith.science/pith/S7FX2JXGDFOHPNI7KOH4S73NQG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/S7FX2JXGDFOHPNI7KOH4S73NQG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T02:38:52Z","links":{"resolver":"https://pith.science/pith/S7FX2JXGDFOHPNI7KOH4S73NQG","bundle":"https://pith.science/pith/S7FX2JXGDFOHPNI7KOH4S73NQG/bundle.json","state":"https://pith.science/pith/S7FX2JXGDFOHPNI7KOH4S73NQG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/S7FX2JXGDFOHPNI7KOH4S73NQG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:S7FX2JXGDFOHPNI7KOH4S73NQG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3d65eb3cf67189952122d7488d7c4f749cde137457b8bc10268847474a5092a2","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-20T10:44:56Z","title_canon_sha256":"ae07026eebf6a5905216e8d8f945313520509589cea2ef9404e267e9691e4cf8"},"schema_version":"1.0","source":{"id":"2605.21008","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21008","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21008v1","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21008","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"pith_short_12","alias_value":"S7FX2JXGDFOH","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"pith_short_16","alias_value":"S7FX2JXGDFOHPNI7","created_at":"2026-05-21T01:05:32Z"},{"alias_kind":"pith_short_8","alias_value":"S7FX2JXG","created_at":"2026-05-21T01:05:32Z"}],"graph_snapshots":[{"event_id":"sha256:38e22ae4a27b4a65f60fee399f2f0d050be4ba94df9f939ff6a8c9b71a853ae3","target":"graph","created_at":"2026-05-21T01:05:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.21008/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reasoning has become a defining capability of modern foundation models, yet its development in the audio modality remains limited. Audio poses challenges that are distinct from those of text and vision. It is continuous, temporally dense, and contains linguistic, paralinguistic, and environmental information at multiple time scales. As a result, audio reasoning models must align acoustic signals with the discrete semantic space of large language models, while still preserving fine-grained information needed for reliable inference. Progress is also limited by three major obstacles: the scarcity","authors_text":"Daxin Tan, Dingdong Wang, Guan-Ting Lin, Han Shi, Irwin King, Jiaya Jia, Jing Xiong, Jingyao Li, Qiyong Zheng, Wenqian Cui, Zhihan Guo","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-20T10:44:56Z","title":"A Survey of Audio Reasoning in Multimodal Foundation Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21008","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d37605841adb16f8ed02053baee4ffa52c5af957d5516e4a7e9a016a73c46a3b","target":"record","created_at":"2026-05-21T01:05:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3d65eb3cf67189952122d7488d7c4f749cde137457b8bc10268847474a5092a2","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-05-20T10:44:56Z","title_canon_sha256":"ae07026eebf6a5905216e8d8f945313520509589cea2ef9404e267e9691e4cf8"},"schema_version":"1.0","source":{"id":"2605.21008","kind":"arxiv","version":1}},"canonical_sha256":"97cb7d26e6195c77b51f538fc97f6d81853f9bf3f3eee05f83e3a2c1ac531f8c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"97cb7d26e6195c77b51f538fc97f6d81853f9bf3f3eee05f83e3a2c1ac531f8c","first_computed_at":"2026-05-21T01:05:32.125091Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:05:32.125091Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zxMFcS/gyVY/NYdEaPAmuYsm9B9JcT2vn+P4nCh538MyQ6fqhqSvnFHbE0O3W5C9OphinBRhnqOUGfeR8W9XCA==","signature_status":"signed_v1","signed_at":"2026-05-21T01:05:32.129114Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.21008","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d37605841adb16f8ed02053baee4ffa52c5af957d5516e4a7e9a016a73c46a3b","sha256:38e22ae4a27b4a65f60fee399f2f0d050be4ba94df9f939ff6a8c9b71a853ae3"],"state_sha256":"7fb03a183923edf503778c4f4c35a7462473ef8c3406b2e819a4e05f39356dc0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5cYoQMDtIQDkzW0ekVKB1d6aCSbTisChnwJK418AWrGjrnJTgaCQGNkrL4pKXLJx6T/1tVaRgEJeobpTqhuWAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T02:38:52.611308Z","bundle_sha256":"6fbcc303363359fd0e31ed8d5ddbafdd67941e7d72dfd07b90174cca73abdc56"}}