{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ELX6Y2U2BD333F24H6BC3VABOS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6c266279a7f23004bd41c2981b37c4793b53077809abdc7fb2882a318958e829","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-17T12:22:17Z","title_canon_sha256":"df99a9853bcc8fb5a0e59412eb37ebe340d10c96648922c8f3f353d2c48cae11"},"schema_version":"1.0","source":{"id":"2605.17414","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17414","created_at":"2026-05-20T00:03:57Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17414v1","created_at":"2026-05-20T00:03:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17414","created_at":"2026-05-20T00:03:57Z"},{"alias_kind":"pith_short_12","alias_value":"ELX6Y2U2BD33","created_at":"2026-05-20T00:03:57Z"},{"alias_kind":"pith_short_16","alias_value":"ELX6Y2U2BD333F24","created_at":"2026-05-20T00:03:57Z"},{"alias_kind":"pith_short_8","alias_value":"ELX6Y2U2","created_at":"2026-05-20T00:03:57Z"}],"graph_snapshots":[{"event_id":"sha256:8e21bac29906e5c36b6508959c2ee4207f3e8eb06c4a32ef520de5b4f6036c94","target":"graph","created_at":"2026-05-20T00:03:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"S2Accompanist achieves state-of-the-art objective performance on the ATTM Grand Challenge benchmark across both the Efficiency and Performance Tracks with only 402M parameters."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The automated data pipeline (structural segmentation, Large Audio-Language Model driven segment-level captioning, and dual-metric quality grading) successfully creates high-quality localized metadata that existing coarse track-level annotations lack."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"S2Accompanist is a 402M-parameter semantic-aware diffusion model that achieves SOTA on the ATTM Grand Challenge benchmark for music accompaniment generation via automated data processing and structure-guided VAE fine-tuning."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A 402-million-parameter diffusion model generates coherent music accompaniments with localized semantic control by creating segment-level metadata and embedding musical structures in its latent space."}],"snapshot_sha256":"21ca9b560c78a63d19bb26a0a38579477df6145fddc44b2d08fc2b6a17ecc046"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"cde2be0819afae1a16ea415e88b9665709a66a569e751480a91519d7ebf8ca4f"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T23:01:19.622641Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T22:52:12.518579Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T21:41:57.743415Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.688434Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.17414/integrity.json","findings":[],"snapshot_sha256":"d262cdb84fbec6f1d8dd66ed6816ed2424f54819fc722c6512f91ae1213376ab","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"High-fidelity text-to-music generation typically relies on massive proprietary datasets and immense computational resources. Existing models often struggle to generate coherent pure musical accompaniments and lack precise, localized semantic control due to their reliance on coarse, track-level annotations. To address these limitations under constrained data and computing resources, we propose S2Accompanist, a Semantic-Aware and Structure-Guided Diffusion Model developed for the ICME2026 ATTM Grand Challenge. Specifically, we design an automated data pipeline comprising structural segmentation,","authors_text":"Chunbo Hao, Guobin Ma, Hanbing Zhang, Huakang Chen, Lei Xie, Mengqi Wei, Pengcheng Zhu, Wenkai Cheng, Yuxuan Xia, Zhixian Zhao","cross_cats":[],"headline":"A 402-million-parameter diffusion model generates coherent music accompaniments with localized semantic control by creating segment-level metadata and embedding musical structures in its latent space.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-17T12:22:17Z","title":"S2Accompanist: A Semantic-Aware and Structure-Guided Diffusion Model for Music Accompaniment Generation"},"references":{"count":26,"internal_anchors":4,"resolved_work":26,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Diffrhythm+: Controllable and flexible full-length song generation with preference optimization,","work_id":"11df16da-c651-445d-b803-a413ffd6cbe1","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Diffrhythm: Blazingly fast and embarrassingly sim- ple end-to-end full-length song generation with latent diffusion","work_id":"d189d936-843d-4f66-b6aa-1e9c623d9065","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Ace-step: A step towards music generation foundation model.arXiv preprint arXiv:2506.00045","work_id":"0d4831b4-71c3-4405-b540-1683d26109f6","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"arXiv preprint arXiv:2602.00744(2026)","work_id":"4d13f3ca-1cdd-4a87-8b3e-6faf7d71bdcc","year":2026},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Noise2Music: Text-conditioned music generation with diffusion models.arXiv preprint arXiv:2302.03917","work_id":"c46cc447-8a7d-4123-bbdf-96b5571efcb6","year":2023}],"snapshot_sha256":"6f89b8ad51c285401d30414e1e5593d9e0699f13487c04f4ed750e486ed59a63"},"source":{"id":"2605.17414","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T22:46:55.658806Z","id":"5c56e6bc-3652-43c1-9059-f1e476b582bd","model_set":{"reader":"grok-4.3"},"one_line_summary":"S2Accompanist is a 402M-parameter semantic-aware diffusion model that achieves SOTA on the ATTM Grand Challenge benchmark for music accompaniment generation via automated data processing and structure-guided VAE fine-tuning.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A 402-million-parameter diffusion model generates coherent music accompaniments with localized semantic control by creating segment-level metadata and embedding musical structures in its latent space.","strongest_claim":"S2Accompanist achieves state-of-the-art objective performance on the ATTM Grand Challenge benchmark across both the Efficiency and Performance Tracks with only 402M parameters.","weakest_assumption":"The automated data pipeline (structural segmentation, Large Audio-Language Model driven segment-level captioning, and dual-metric quality grading) successfully creates high-quality localized metadata that existing coarse track-level annotations lack."}},"verdict_id":"5c56e6bc-3652-43c1-9059-f1e476b582bd"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d261df47c0225eb45efdba6c84ce8a43b3c4145994a7721707f4cfe34d8f63e9","target":"record","created_at":"2026-05-20T00:03:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6c266279a7f23004bd41c2981b37c4793b53077809abdc7fb2882a318958e829","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-05-17T12:22:17Z","title_canon_sha256":"df99a9853bcc8fb5a0e59412eb37ebe340d10c96648922c8f3f353d2c48cae11"},"schema_version":"1.0","source":{"id":"2605.17414","kind":"arxiv","version":1}},"canonical_sha256":"22efec6a9a08f7bd975c3f822dd40174b3dffb5740c4b4acdc6adb0c3f880101","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"22efec6a9a08f7bd975c3f822dd40174b3dffb5740c4b4acdc6adb0c3f880101","first_computed_at":"2026-05-20T00:03:57.271216Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:57.271216Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"u3PY0YTRj6xl7bH1TTV6Vpd+3ULz9EvbNBscGKE8Mc9+lHkTvEcqj96OiEjsnCiU58HBU2DmfoiZ3wTE56EkAw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:57.272020Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.17414","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d261df47c0225eb45efdba6c84ce8a43b3c4145994a7721707f4cfe34d8f63e9","sha256:8e21bac29906e5c36b6508959c2ee4207f3e8eb06c4a32ef520de5b4f6036c94"],"state_sha256":"c05c0aa5b79c86e60313b743dd5641c95ee5723d81f6c443e623b1a73e3bb106"}