{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:NZULMFS5HX4UQINO25CDRQRCUO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a51112143915576fdf494e1aa0105f13aad4e6b1ea16fe77084cb5dba490d2c9","cross_cats_sorted":["cs.AI","cs.CL","cs.MM","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2025-02-23T14:24:15Z","title_canon_sha256":"dd2f381bd5a84a8a601a273a66bab614d3da45514ab0cf29f019c57b181241e6"},"schema_version":"1.0","source":{"id":"2502.16584","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.16584","created_at":"2026-06-09T01:05:05Z"},{"alias_kind":"arxiv_version","alias_value":"2502.16584v2","created_at":"2026-06-09T01:05:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.16584","created_at":"2026-06-09T01:05:05Z"},{"alias_kind":"pith_short_12","alias_value":"NZULMFS5HX4U","created_at":"2026-06-09T01:05:05Z"},{"alias_kind":"pith_short_16","alias_value":"NZULMFS5HX4UQINO","created_at":"2026-06-09T01:05:05Z"},{"alias_kind":"pith_short_8","alias_value":"NZULMFS5","created_at":"2026-06-09T01:05:05Z"}],"graph_snapshots":[{"event_id":"sha256:c989cb464bf51563cf1f7f3c8f23ce6081f903fa3d5e1a6a0ee3adc60a1d1b4f","target":"graph","created_at":"2026-06-09T01:05:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2502.16584/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent advancements in audio tokenization have significantly enhanced the integration of audio capabilities into large language models (LLMs). However, audio understanding and generation are often treated as distinct tasks, hindering the development of truly unified audio-language models. While instruction tuning has demonstrated remarkable success in improving generalization and zero-shot learning across text and vision, its application to audio remains largely unexplored. A major obstacle is the lack of comprehensive datasets that unify audio understanding and generation. To address this, we","authors_text":"Chuanbo Zhu, Dongchao Yang, Emmanouil Benetos, Ge Zhang, Haohan Guo, Haohe Liu, Jiahao Pan, Liumeng Xue, Ruibin Yuan, Shuai Fan, Sitong Cheng, Tianchi Liu, Wei Xue, Xingjian Du, Xinsheng Wang, Xinshen Zhang, Yike Guo, Yinghao Ma, Yujia Xiao, Zeyue Tian, Zixuan Li, Zixuan Shen, Ziya Zhou","cross_cats":["cs.AI","cs.CL","cs.MM","eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2025-02-23T14:24:15Z","title":"Audio-FLAN: An Instruction-Following Dataset for Unified Audio Understanding and Generation of Speech, Music, and Sound"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.16584","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:143ac34b4b7c24982e2acaad6c66f0fd4cbc3f9b313645a6b8879c8feb3eb25e","target":"record","created_at":"2026-06-09T01:05:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a51112143915576fdf494e1aa0105f13aad4e6b1ea16fe77084cb5dba490d2c9","cross_cats_sorted":["cs.AI","cs.CL","cs.MM","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2025-02-23T14:24:15Z","title_canon_sha256":"dd2f381bd5a84a8a601a273a66bab614d3da45514ab0cf29f019c57b181241e6"},"schema_version":"1.0","source":{"id":"2502.16584","kind":"arxiv","version":2}},"canonical_sha256":"6e68b6165d3df94821aed74438c222a3aa8195a280ddd48f23b3720b70a94113","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6e68b6165d3df94821aed74438c222a3aa8195a280ddd48f23b3720b70a94113","first_computed_at":"2026-06-09T01:05:05.687494Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:05:05.687494Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8JheIQpUbQvgnEQTW7oO+T4yBL/KPIS/fu+1IVyLxdjHtGzwVqYJmQJYubiDt2BdGYCOM8UO/jR0glw1eovpAQ==","signature_status":"signed_v1","signed_at":"2026-06-09T01:05:05.688074Z","signed_message":"canonical_sha256_bytes"},"source_id":"2502.16584","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:143ac34b4b7c24982e2acaad6c66f0fd4cbc3f9b313645a6b8879c8feb3eb25e","sha256:c989cb464bf51563cf1f7f3c8f23ce6081f903fa3d5e1a6a0ee3adc60a1d1b4f"],"state_sha256":"5a04cfcbbea1a326a57be4f386d3e1dc4a61a0fa33b02185351eb304fe47db59"}