{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:7JCXNMW6V3J23RMAC4GIFTU5PA","short_pith_number":"pith:7JCXNMW6","schema_version":"1.0","canonical_sha256":"fa4576b2deaed3adc580170c82ce9d780dc4346666853af469de370ef88b8509","source":{"kind":"arxiv","id":"2603.19185","version":2},"attestation_state":"computed","paper":{"title":"MIDST Challenge at SaTML 2025: Membership Inference over Diffusion-models-based Synthetic Tabular data","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"The MIDST challenge shows that membership inference attacks can quantify privacy leakage in synthetic tabular data from diffusion models.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Deval Pandya, Gauri Sharma, John Jewell, Mahshid Alinoori, Masoumeh Shafieinejad, Sana Ayromlou, Veronica Chatrath, Wei Pang, Xi He","submitted_at":"2026-03-19T17:42:46Z","abstract_excerpt":"Synthetic data is often perceived as a silver-bullet solution to data anonymization and privacy-preserving data publishing. Drawn from generative models like diffusion models, synthetic data is expected to preserve the statistical properties of the original dataset while remaining resilient to privacy attacks. Recent developments of diffusion models have been effective on a wide range of data types, but their privacy resilience, particularly for tabular formats, remains largely unexplored. MIDST challenge sought a quantitative evaluation of the privacy gain of synthetic tabular data generated "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.19185","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-19T17:42:46Z","cross_cats_sorted":[],"title_canon_sha256":"648fc89bfa38d941e16185ea3bd6d094c500c913227d7e0133265f783e9a44c7","abstract_canon_sha256":"2a59df67fe3a4bdf77f1c1ea31d7364dfdc960dd1e47bb274507d4ff22cd1816"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:30.812287Z","signature_b64":"SPZT4oiY+Sgl7ia7y1/xwE+0obIeqbH27GFEa+dAdXh/+VAwnCYVlvfu1HLzZtJgPl8SqltyC9UScPVFQJVSAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fa4576b2deaed3adc580170c82ce9d780dc4346666853af469de370ef88b8509","last_reissued_at":"2026-05-18T02:44:30.811804Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:30.811804Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MIDST Challenge at SaTML 2025: Membership Inference over Diffusion-models-based Synthetic Tabular data","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"The MIDST challenge shows that membership inference attacks can quantify privacy leakage in synthetic tabular data from diffusion models.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Deval Pandya, Gauri Sharma, John Jewell, Mahshid Alinoori, Masoumeh Shafieinejad, Sana Ayromlou, Veronica Chatrath, Wei Pang, Xi He","submitted_at":"2026-03-19T17:42:46Z","abstract_excerpt":"Synthetic data is often perceived as a silver-bullet solution to data anonymization and privacy-preserving data publishing. Drawn from generative models like diffusion models, synthetic data is expected to preserve the statistical properties of the original dataset while remaining resilient to privacy attacks. Recent developments of diffusion models have been effective on a wide range of data types, but their privacy resilience, particularly for tabular formats, remains largely unexplored. MIDST challenge sought a quantitative evaluation of the privacy gain of synthetic tabular data generated "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"MIDST inspired the development of novel black-box and white-box MIAs tailored to these target diffusion models as a key outcome, enabling a comprehensive evaluation of their privacy efficacy.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the synthetic tabular data generated by diffusion models can be meaningfully targeted by membership inference attacks in ways that quantify real privacy leakage, an assumption the challenge is designed to test but not independently verified in the abstract.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"The MIDST challenge evaluated privacy resilience of diffusion-generated synthetic tabular data via membership inference attacks and produced new black-box and white-box attack methods.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"The MIDST challenge shows that membership inference attacks can quantify privacy leakage in synthetic tabular data from diffusion models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"044e9c92651e31fedc961b66f24a024d2c6540f67e6dc0625050fea2f8474c05"},"source":{"id":"2603.19185","kind":"arxiv","version":2},"verdict":{"id":"243d8fc7-4d7a-4ef8-9d20-450c0abc1c6b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T08:03:56.069730Z","strongest_claim":"MIDST inspired the development of novel black-box and white-box MIAs tailored to these target diffusion models as a key outcome, enabling a comprehensive evaluation of their privacy efficacy.","one_line_summary":"The MIDST challenge evaluated privacy resilience of diffusion-generated synthetic tabular data via membership inference attacks and produced new black-box and white-box attack methods.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the synthetic tabular data generated by diffusion models can be meaningfully targeted by membership inference attacks in ways that quantify real privacy leakage, an assumption the challenge is designed to test but not independently verified in the abstract.","pith_extraction_headline":"The MIDST challenge shows that membership inference attacks can quantify privacy leakage in synthetic tabular data from diffusion models."},"references":{"count":30,"sample":[{"doi":"","year":2020,"title":"S. A. Assefa, D. Dervovic, M. Mahfouz, R. E. Tillman, P. Reddy, and M. Veloso. Generating synthetic data in finance: opportunities, challenges and pitfalls. In Proceedings of the First ACM Internation","work_id":"7c9d518e-fd75-4b1a-8ace-fb73dae1dd23","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2000,"title":"P. Berka et al. Guide to the financial data set.PKDD2000 discovery challenge, 2000","work_id":"f5049c31-5e21-4787-82f7-55094b60d384","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"N. Carlini, J. Hayes, M. Nasr, M. Jagielski, V . Sehwag, F. Tram `er, B. Balle, D. Ippolito, and E. Wallace. Ex- tracting training data from diffusion models. InUSENIX Security 23, pages 5253–5270, 20","work_id":"2a0cfabb-c9eb-4229-a0f1-2c72152111a6","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"J. Duan, F. Kong, S. Wang, X. Shi, and K. Xu. Are diffusion models vulnerable to membership inference attacks? InICML, 2023","work_id":"f46a444f-6109-4529-a8fb-86efbf67232f","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"J. Fonseca and F. Bacao. Tabular and latent space synthetic data generation: a literature review.Journal of Big Data, 10(1):115, 2023","work_id":"6ed28774-71f8-47c1-95a3-7a537fe70cef","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":30,"snapshot_sha256":"60ffa37ee5a083257cb2df5da5779b8b782bab1a002c7230c27ee94687fe4e47","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.19185","created_at":"2026-05-18T02:44:30.811877+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.19185v2","created_at":"2026-05-18T02:44:30.811877+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.19185","created_at":"2026-05-18T02:44:30.811877+00:00"},{"alias_kind":"pith_short_12","alias_value":"7JCXNMW6V3J2","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"7JCXNMW6V3J23RMA","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"7JCXNMW6","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.11527","citing_title":"FERMI: Exploiting Relations for Membership Inference Against Tabular Diffusion Models","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06835","citing_title":"On Privacy Leakage in Tabular Diffusion Models: Influential Factors, Attacker Knowledge, and Metrics","ref_index":48,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA","json":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA.json","graph_json":"https://pith.science/api/pith-number/7JCXNMW6V3J23RMAC4GIFTU5PA/graph.json","events_json":"https://pith.science/api/pith-number/7JCXNMW6V3J23RMAC4GIFTU5PA/events.json","paper":"https://pith.science/paper/7JCXNMW6"},"agent_actions":{"view_html":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA","download_json":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA.json","view_paper":"https://pith.science/paper/7JCXNMW6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.19185&json=true","fetch_graph":"https://pith.science/api/pith-number/7JCXNMW6V3J23RMAC4GIFTU5PA/graph.json","fetch_events":"https://pith.science/api/pith-number/7JCXNMW6V3J23RMAC4GIFTU5PA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA/action/storage_attestation","attest_author":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA/action/author_attestation","sign_citation":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA/action/citation_signature","submit_replication":"https://pith.science/pith/7JCXNMW6V3J23RMAC4GIFTU5PA/action/replication_record"}},"created_at":"2026-05-18T02:44:30.811877+00:00","updated_at":"2026-05-18T02:44:30.811877+00:00"}