{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:DM6PJLIYBTS6D7QLI6TZ4RAPEA","short_pith_number":"pith:DM6PJLIY","schema_version":"1.0","canonical_sha256":"1b3cf4ad180ce5e1fe0b47a79e440f200d8310eb30b0a9e2c72ae722596b10cf","source":{"kind":"arxiv","id":"2605.30365","version":1},"attestation_state":"computed","paper":{"title":"Mental Damage: Caption Poisoning Attacks on Retrieval-Augmented Text-to-Music Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","eess.AS"],"primary_cat":"cs.SD","authors_text":"Hanqing Guo, Long Cheng, Nan Zhang, Shuhao Zhang, Yizhu Wen","submitted_at":"2026-05-18T02:11:57Z","abstract_excerpt":"Retrieval-augmented text-to-music (TTM) systems augment underspecified user prompts using captions retrieved from a music caption dataset. This design introduces an integrity dependency on the music knowledge database. We show that an attacker can poison the database by injecting a small number of crafted music captions, causing the system to retrieve malicious captions that bias prompt augmentation and steer generation away from the user's intended function, without modifying the user prompt, retriever, or generator. To achieve the music caption poisoning attack, we propose a dual-layer capti"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.30365","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-05-18T02:11:57Z","cross_cats_sorted":["cs.AI","eess.AS"],"title_canon_sha256":"5d5de76db9dafd006b8ab0d84918dc1cb44ae3624312e5750294b2b1dce02187","abstract_canon_sha256":"b02bbda54adc7cb33fc33fe305a430b3784a257ff536b5adf76a7d3d0c7d1b21"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T00:02:06.961638Z","signature_b64":"TctqJtaO4uvn3bFzHRIQ1zG8sp0g4BpSxj/du/9xcQPJLMHSiQyW4cayh4V1ekfqBPawLog3HuVvsl32gG4iDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1b3cf4ad180ce5e1fe0b47a79e440f200d8310eb30b0a9e2c72ae722596b10cf","last_reissued_at":"2026-06-01T00:02:06.960680Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T00:02:06.960680Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mental Damage: Caption Poisoning Attacks on Retrieval-Augmented Text-to-Music Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","eess.AS"],"primary_cat":"cs.SD","authors_text":"Hanqing Guo, Long Cheng, Nan Zhang, Shuhao Zhang, Yizhu Wen","submitted_at":"2026-05-18T02:11:57Z","abstract_excerpt":"Retrieval-augmented text-to-music (TTM) systems augment underspecified user prompts using captions retrieved from a music caption dataset. This design introduces an integrity dependency on the music knowledge database. We show that an attacker can poison the database by injecting a small number of crafted music captions, causing the system to retrieve malicious captions that bias prompt augmentation and steer generation away from the user's intended function, without modifying the user prompt, retriever, or generator. To achieve the music caption poisoning attack, we propose a dual-layer capti"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30365","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30365/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.30365","created_at":"2026-06-01T00:02:06.960801+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.30365v1","created_at":"2026-06-01T00:02:06.960801+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30365","created_at":"2026-06-01T00:02:06.960801+00:00"},{"alias_kind":"pith_short_12","alias_value":"DM6PJLIYBTS6","created_at":"2026-06-01T00:02:06.960801+00:00"},{"alias_kind":"pith_short_16","alias_value":"DM6PJLIYBTS6D7QL","created_at":"2026-06-01T00:02:06.960801+00:00"},{"alias_kind":"pith_short_8","alias_value":"DM6PJLIY","created_at":"2026-06-01T00:02:06.960801+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA","json":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA.json","graph_json":"https://pith.science/api/pith-number/DM6PJLIYBTS6D7QLI6TZ4RAPEA/graph.json","events_json":"https://pith.science/api/pith-number/DM6PJLIYBTS6D7QLI6TZ4RAPEA/events.json","paper":"https://pith.science/paper/DM6PJLIY"},"agent_actions":{"view_html":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA","download_json":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA.json","view_paper":"https://pith.science/paper/DM6PJLIY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.30365&json=true","fetch_graph":"https://pith.science/api/pith-number/DM6PJLIYBTS6D7QLI6TZ4RAPEA/graph.json","fetch_events":"https://pith.science/api/pith-number/DM6PJLIYBTS6D7QLI6TZ4RAPEA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA/action/storage_attestation","attest_author":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA/action/author_attestation","sign_citation":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA/action/citation_signature","submit_replication":"https://pith.science/pith/DM6PJLIYBTS6D7QLI6TZ4RAPEA/action/replication_record"}},"created_at":"2026-06-01T00:02:06.960801+00:00","updated_at":"2026-06-01T00:02:06.960801+00:00"}