{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:JYYPWID2MWKFIELS225R7LV3SN","short_pith_number":"pith:JYYPWID2","schema_version":"1.0","canonical_sha256":"4e30fb207a6594541172d6bb1faebb937d0611d10cb316dcb2fbd9d5cf22d0f4","source":{"kind":"arxiv","id":"2502.13713","version":5},"attestation_state":"computed","paper":{"title":"TALKPLAY: Multimodal Music Recommendation with Large Language Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.IR","authors_text":"Juhan Nam, Keunwoo Choi, Seungheon Doh","submitted_at":"2025-02-19T13:28:20Z","abstract_excerpt":"We present TALKPLAY, a novel multimodal music recommendation system that reformulates recommendation as a token generation problem using large language models (LLMs). By leveraging the instruction-following and natural language generation capabilities of LLMs, our system effectively recommends music from diverse user queries while generating contextually relevant responses. While pretrained LLMs are primarily designed for text modality, TALKPLAY extends their scope through two key innovations: a multimodal music tokenizer that encodes audio features, lyrics, metadata, semantic tags, and playli"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2502.13713","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.IR","submitted_at":"2025-02-19T13:28:20Z","cross_cats_sorted":["cs.SD","eess.AS"],"title_canon_sha256":"0ddf3e411228dd802c98f669104577bb5a728e6ae6cd522243991ea4dddab108","abstract_canon_sha256":"af10d4298fdfe9fb503c8170ec6a1cbbf2ae97db8c4accd3460c049a4d5b8465"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:43.566846Z","signature_b64":"tRaFArQvPx0S+afzJxn0s9MxDb+DHy5XVbSaHeqQv/7c1DvGW0L/3VI9l7ITkH5sD2y+wHEsSTR9uEshOPboBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4e30fb207a6594541172d6bb1faebb937d0611d10cb316dcb2fbd9d5cf22d0f4","last_reissued_at":"2026-06-03T01:05:43.566413Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:43.566413Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TALKPLAY: Multimodal Music Recommendation with Large Language Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.SD","eess.AS"],"primary_cat":"cs.IR","authors_text":"Juhan Nam, Keunwoo Choi, Seungheon Doh","submitted_at":"2025-02-19T13:28:20Z","abstract_excerpt":"We present TALKPLAY, a novel multimodal music recommendation system that reformulates recommendation as a token generation problem using large language models (LLMs). By leveraging the instruction-following and natural language generation capabilities of LLMs, our system effectively recommends music from diverse user queries while generating contextually relevant responses. While pretrained LLMs are primarily designed for text modality, TALKPLAY extends their scope through two key innovations: a multimodal music tokenizer that encodes audio features, lyrics, metadata, semantic tags, and playli"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.13713","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2502.13713/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2502.13713","created_at":"2026-06-03T01:05:43.566469+00:00"},{"alias_kind":"arxiv_version","alias_value":"2502.13713v5","created_at":"2026-06-03T01:05:43.566469+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.13713","created_at":"2026-06-03T01:05:43.566469+00:00"},{"alias_kind":"pith_short_12","alias_value":"JYYPWID2MWKF","created_at":"2026-06-03T01:05:43.566469+00:00"},{"alias_kind":"pith_short_16","alias_value":"JYYPWID2MWKFIELS","created_at":"2026-06-03T01:05:43.566469+00:00"},{"alias_kind":"pith_short_8","alias_value":"JYYPWID2","created_at":"2026-06-03T01:05:43.566469+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2605.09120","citing_title":"Reddit2Deezer: A Scalable Dataset for Real-World Grounded Conversational Music Recommendation","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06331","citing_title":"Expressiveness Limits of Autoregressive Semantic ID Generation in Generative Recommendation","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07895","citing_title":"DialBGM: A Benchmark for Background Music Recommendation from Everyday Multi-Turn Dialogues","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN","json":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN.json","graph_json":"https://pith.science/api/pith-number/JYYPWID2MWKFIELS225R7LV3SN/graph.json","events_json":"https://pith.science/api/pith-number/JYYPWID2MWKFIELS225R7LV3SN/events.json","paper":"https://pith.science/paper/JYYPWID2"},"agent_actions":{"view_html":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN","download_json":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN.json","view_paper":"https://pith.science/paper/JYYPWID2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2502.13713&json=true","fetch_graph":"https://pith.science/api/pith-number/JYYPWID2MWKFIELS225R7LV3SN/graph.json","fetch_events":"https://pith.science/api/pith-number/JYYPWID2MWKFIELS225R7LV3SN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN/action/storage_attestation","attest_author":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN/action/author_attestation","sign_citation":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN/action/citation_signature","submit_replication":"https://pith.science/pith/JYYPWID2MWKFIELS225R7LV3SN/action/replication_record"}},"created_at":"2026-06-03T01:05:43.566469+00:00","updated_at":"2026-06-03T01:05:43.566469+00:00"}