{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:34ARD4PWGBFKI67Z7WST6QFWAE","short_pith_number":"pith:34ARD4PW","canonical_record":{"source":{"id":"2605.15532","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-15T02:04:12Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"205a12a65d62f544b2982b6c8f2121ea2c905435acae6af428c50bf3c8c6dfe0","abstract_canon_sha256":"883bae94ff2fc62e21fb5ee30815f5c9ef232e4908e27f5ccf176f7fa4d411df"},"schema_version":"1.0"},"canonical_sha256":"df0111f1f6304aa47bf9fda53f40b6011f2f9be2ff2b28c78881d1e4945a2ff0","source":{"kind":"arxiv","id":"2605.15532","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15532","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15532v1","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15532","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"pith_short_12","alias_value":"34ARD4PWGBFK","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"pith_short_16","alias_value":"34ARD4PWGBFKI67Z","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"pith_short_8","alias_value":"34ARD4PW","created_at":"2026-05-20T00:01:03Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:34ARD4PWGBFKI67Z7WST6QFWAE","target":"record","payload":{"canonical_record":{"source":{"id":"2605.15532","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-15T02:04:12Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"205a12a65d62f544b2982b6c8f2121ea2c905435acae6af428c50bf3c8c6dfe0","abstract_canon_sha256":"883bae94ff2fc62e21fb5ee30815f5c9ef232e4908e27f5ccf176f7fa4d411df"},"schema_version":"1.0"},"canonical_sha256":"df0111f1f6304aa47bf9fda53f40b6011f2f9be2ff2b28c78881d1e4945a2ff0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:03.760420Z","signature_b64":"yk29X1gFcoZYbzYGvH1xIAS0aAY0DQ0FMCDvpwNvxNQDvfDYcQ8GzohSYSvWY7EhZXYiTNkHGm7N9p84vDXyAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"df0111f1f6304aa47bf9fda53f40b6011f2f9be2ff2b28c78881d1e4945a2ff0","last_reissued_at":"2026-05-20T00:01:03.759525Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:03.759525Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.15532","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:01:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hi9hrFgTdjyL6QE3/PPycP+Is7lyAXLiRL/q/QzxafANWwadyzijCCLMaP+CI5Nib0/ScK8EV45hBNfa32CYCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T10:09:42.921770Z"},"content_sha256":"ce19f5b24f711d5acdff0b52e4c1bb35e9461a249c85d992375bcfc97bb98ae1","schema_version":"1.0","event_id":"sha256:ce19f5b24f711d5acdff0b52e4c1bb35e9461a249c85d992375bcfc97bb98ae1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:34ARD4PWGBFKI67Z7WST6QFWAE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"DeltaPrompts: Escaping the Zero-Delta Trap in Multimodal Distillation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"High answer divergence between teacher and student makes prompts far more effective for distilling reasoning into smaller vision-language models.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Brandon Cui, David Acuna, Hyunwoo Kim, Jaehun Jung, Prithviraj Ammanabrolu, Ximing Lu, Yejin Choi","submitted_at":"2026-05-15T02:04:12Z","abstract_excerpt":"Distillation enables compact Vision-Language Models (VLMs) to obtain strong reasoning capabilities, yet the prompts driving this process are typically chosen via simple heuristics or aggregated from off-the-shelf datasets. We reveal a critical inefficiency in this approach: up to 69% of the prompts in standard chart / document reasoning datasets are effectively zero-delta, meaning the teacher and student already induce the exact same answer distribution. Training on these prompts provides minimal learning signal, causing student improvement to rapidly saturate regardless of data scale. To esca"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"DeltaPrompts drives substantial gains, yielding up to 15% relative improvement even on top of a highly-optimized reasoning model (e.g., Qwen3-VL-8B-Thinking) -- averaged over 10 benchmarks spanning chart, document and perception-centric reasoning.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That actively targeting student failure modes via the staged synthesis pipeline produces prompts whose divergence directly translates to improved learning signal without introducing new biases or distribution shifts that could harm generalization.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"DeltaPrompts generates 200k high-divergence reasoning prompts via staged synthesis to escape zero-delta traps in multimodal distillation, yielding up to 15% relative gains on chart, document, and perception benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"High answer divergence between teacher and student makes prompts far more effective for distilling reasoning into smaller vision-language models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"cdc10798ca9f2332747a2b9726431c3211dbb0e4e35a12e95b7feb203cd3f8b0"},"source":{"id":"2605.15532","kind":"arxiv","version":1},"verdict":{"id":"b7af8841-0403-4863-9172-f0db40e777f1","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T14:31:11.687335Z","strongest_claim":"DeltaPrompts drives substantial gains, yielding up to 15% relative improvement even on top of a highly-optimized reasoning model (e.g., Qwen3-VL-8B-Thinking) -- averaged over 10 benchmarks spanning chart, document and perception-centric reasoning.","one_line_summary":"DeltaPrompts generates 200k high-divergence reasoning prompts via staged synthesis to escape zero-delta traps in multimodal distillation, yielding up to 15% relative gains on chart, document, and perception benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That actively targeting student failure modes via the staged synthesis pipeline produces prompts whose divergence directly translates to improved learning signal without introducing new biases or distribution shifts that could harm generalization.","pith_extraction_headline":"High answer divergence between teacher and student makes prompts far more effective for distilling reasoning into smaller vision-language models."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15532/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:17.513013Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T14:37:37.984536Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"cited_work_retraction","ran_at":"2026-05-19T14:22:02.006668Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T14:21:54.035861Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"shingle_duplication","ran_at":"2026-05-19T13:49:41.834256Z","status":"skipped","version":"0.1.0","findings_count":0},{"name":"citation_quote_validity","ran_at":"2026-05-19T13:49:41.371700Z","status":"skipped","version":"0.1.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.619583Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"e158b3ccfecf004fb59247836c7e09f1c1e6af097f7b21c4648c63a98b391f7c"},"references":{"count":78,"sample":[{"doi":"","year":2026,"title":"D. Acuna, C.-H. H. Yang, Y . Deng, J. Jung, X. Lu, P. Ammanabrolu, H. Kim, Y .-H. Liao, and Y . Choi. Long grounded thoughts: Synthesizing visual problems and reasoning chains at scale, 2026","work_id":"5296174e-d9cd-4275-a694-5daeb4dafa58","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"R. Agarwal, N. Vieillard, Y . Zhou, P. Stanczyk, S. Ramos, M. Geist, and O. Bachem. On-policy distillation of language models: Learning from self-generated mistakes, 2024","work_id":"57614b41-c052-4302-9168-634308fcf8c8","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"E. Agustsson and R. Timofte. Ntire 2017 challenge on single image super-resolution: Dataset and study. InThe IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, July 2017","work_id":"43202a28-1f30-4f67-b605-af0999bda6eb","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"S. Bai, Y . Cai, R. Chen, K. Chen, X. Chen, Z. Cheng, L. Deng, W. Ding, C. Gao, C. Ge, W. Ge, Z. Guo, Q. Huang, J. Huang, F. Huang, B. Hui, S. Jiang, Z. Li, M. Li, M. Li, K. Li, Z. Lin, J. Lin, X. Liu","work_id":"5c2c9e66-6945-465b-8b2d-7c4887c5f30b","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"E. Borisova, N. Rauscher, and G. Rehm. SciVQA 2025: Overview of the first scientific visual question answering shared task. In T. Ghosal, P. Mayr, A. Singh, A. Naik, G. Rehm, D. Freitag, D. Li, S. Sch","work_id":"593dae8a-fdfa-4b6b-bd0d-10954b2b1343","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":78,"snapshot_sha256":"9b86503861e3f815ebcc2fe235961c0d955309e653cede649c6479dcc2b677e8","internal_anchors":1},"formal_canon":{"evidence_count":2,"snapshot_sha256":"e341732f84f0de88fcd6b40e61b374e38a4a40b9771c1dae6b12c4fe7ae2e4fe"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"b7af8841-0403-4863-9172-f0db40e777f1"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:01:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6vprIJmQJaxgzzG2WucLMUM8WIUzHodcTq/NqF2iJs0FVAvIGjiZj7DxrsOK6Y37fxcFBlDE4T6HE+dZBUXQDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T10:09:42.923054Z"},"content_sha256":"fd6f0ef88d5974bcc57455b54ccd8fa9ffbc2a282a6ea31006938d5cf408b169","schema_version":"1.0","event_id":"sha256:fd6f0ef88d5974bcc57455b54ccd8fa9ffbc2a282a6ea31006938d5cf408b169"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/34ARD4PWGBFKI67Z7WST6QFWAE/bundle.json","state_url":"https://pith.science/pith/34ARD4PWGBFKI67Z7WST6QFWAE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/34ARD4PWGBFKI67Z7WST6QFWAE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T10:09:42Z","links":{"resolver":"https://pith.science/pith/34ARD4PWGBFKI67Z7WST6QFWAE","bundle":"https://pith.science/pith/34ARD4PWGBFKI67Z7WST6QFWAE/bundle.json","state":"https://pith.science/pith/34ARD4PWGBFKI67Z7WST6QFWAE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/34ARD4PWGBFKI67Z7WST6QFWAE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:34ARD4PWGBFKI67Z7WST6QFWAE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"883bae94ff2fc62e21fb5ee30815f5c9ef232e4908e27f5ccf176f7fa4d411df","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-15T02:04:12Z","title_canon_sha256":"205a12a65d62f544b2982b6c8f2121ea2c905435acae6af428c50bf3c8c6dfe0"},"schema_version":"1.0","source":{"id":"2605.15532","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15532","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15532v1","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15532","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"pith_short_12","alias_value":"34ARD4PWGBFK","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"pith_short_16","alias_value":"34ARD4PWGBFKI67Z","created_at":"2026-05-20T00:01:03Z"},{"alias_kind":"pith_short_8","alias_value":"34ARD4PW","created_at":"2026-05-20T00:01:03Z"}],"graph_snapshots":[{"event_id":"sha256:fd6f0ef88d5974bcc57455b54ccd8fa9ffbc2a282a6ea31006938d5cf408b169","target":"graph","created_at":"2026-05-20T00:01:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"DeltaPrompts drives substantial gains, yielding up to 15% relative improvement even on top of a highly-optimized reasoning model (e.g., Qwen3-VL-8B-Thinking) -- averaged over 10 benchmarks spanning chart, document and perception-centric reasoning."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That actively targeting student failure modes via the staged synthesis pipeline produces prompts whose divergence directly translates to improved learning signal without introducing new biases or distribution shifts that could harm generalization."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"DeltaPrompts generates 200k high-divergence reasoning prompts via staged synthesis to escape zero-delta traps in multimodal distillation, yielding up to 15% relative gains on chart, document, and perception benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"High answer divergence between teacher and student makes prompts far more effective for distilling reasoning into smaller vision-language models."}],"snapshot_sha256":"cdc10798ca9f2332747a2b9726431c3211dbb0e4e35a12e95b7feb203cd3f8b0"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"e341732f84f0de88fcd6b40e61b374e38a4a40b9771c1dae6b12c4fe7ae2e4fe"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:17.513013Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T14:37:37.984536Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"cited_work_retraction","ran_at":"2026-05-19T14:22:02.006668Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T14:21:54.035861Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"shingle_duplication","ran_at":"2026-05-19T13:49:41.834256Z","status":"skipped","version":"0.1.0"},{"findings_count":0,"name":"citation_quote_validity","ran_at":"2026-05-19T13:49:41.371700Z","status":"skipped","version":"0.1.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.619583Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.15532/integrity.json","findings":[],"snapshot_sha256":"e158b3ccfecf004fb59247836c7e09f1c1e6af097f7b21c4648c63a98b391f7c","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Distillation enables compact Vision-Language Models (VLMs) to obtain strong reasoning capabilities, yet the prompts driving this process are typically chosen via simple heuristics or aggregated from off-the-shelf datasets. We reveal a critical inefficiency in this approach: up to 69% of the prompts in standard chart / document reasoning datasets are effectively zero-delta, meaning the teacher and student already induce the exact same answer distribution. Training on these prompts provides minimal learning signal, causing student improvement to rapidly saturate regardless of data scale. To esca","authors_text":"Brandon Cui, David Acuna, Hyunwoo Kim, Jaehun Jung, Prithviraj Ammanabrolu, Ximing Lu, Yejin Choi","cross_cats":["cs.AI","cs.CL"],"headline":"High answer divergence between teacher and student makes prompts far more effective for distilling reasoning into smaller vision-language models.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-15T02:04:12Z","title":"DeltaPrompts: Escaping the Zero-Delta Trap in Multimodal Distillation"},"references":{"count":78,"internal_anchors":1,"resolved_work":78,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"D. Acuna, C.-H. H. Yang, Y . Deng, J. Jung, X. Lu, P. Ammanabrolu, H. Kim, Y .-H. Liao, and Y . Choi. Long grounded thoughts: Synthesizing visual problems and reasoning chains at scale, 2026","work_id":"5296174e-d9cd-4275-a694-5daeb4dafa58","year":2026},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"R. Agarwal, N. Vieillard, Y . Zhou, P. Stanczyk, S. Ramos, M. Geist, and O. Bachem. On-policy distillation of language models: Learning from self-generated mistakes, 2024","work_id":"57614b41-c052-4302-9168-634308fcf8c8","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"E. Agustsson and R. Timofte. Ntire 2017 challenge on single image super-resolution: Dataset and study. InThe IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, July 2017","work_id":"43202a28-1f30-4f67-b605-af0999bda6eb","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"S. Bai, Y . Cai, R. Chen, K. Chen, X. Chen, Z. Cheng, L. Deng, W. Ding, C. Gao, C. Ge, W. Ge, Z. Guo, Q. Huang, J. Huang, F. Huang, B. Hui, S. Jiang, Z. Li, M. Li, M. Li, K. Li, Z. Lin, J. Lin, X. Liu","work_id":"5c2c9e66-6945-465b-8b2d-7c4887c5f30b","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"E. Borisova, N. Rauscher, and G. Rehm. SciVQA 2025: Overview of the first scientific visual question answering shared task. In T. Ghosal, P. Mayr, A. Singh, A. Naik, G. Rehm, D. Freitag, D. Li, S. Sch","work_id":"593dae8a-fdfa-4b6b-bd0d-10954b2b1343","year":2025}],"snapshot_sha256":"9b86503861e3f815ebcc2fe235961c0d955309e653cede649c6479dcc2b677e8"},"source":{"id":"2605.15532","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T14:31:11.687335Z","id":"b7af8841-0403-4863-9172-f0db40e777f1","model_set":{"reader":"grok-4.3"},"one_line_summary":"DeltaPrompts generates 200k high-divergence reasoning prompts via staged synthesis to escape zero-delta traps in multimodal distillation, yielding up to 15% relative gains on chart, document, and perception benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"High answer divergence between teacher and student makes prompts far more effective for distilling reasoning into smaller vision-language models.","strongest_claim":"DeltaPrompts drives substantial gains, yielding up to 15% relative improvement even on top of a highly-optimized reasoning model (e.g., Qwen3-VL-8B-Thinking) -- averaged over 10 benchmarks spanning chart, document and perception-centric reasoning.","weakest_assumption":"That actively targeting student failure modes via the staged synthesis pipeline produces prompts whose divergence directly translates to improved learning signal without introducing new biases or distribution shifts that could harm generalization."}},"verdict_id":"b7af8841-0403-4863-9172-f0db40e777f1"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ce19f5b24f711d5acdff0b52e4c1bb35e9461a249c85d992375bcfc97bb98ae1","target":"record","created_at":"2026-05-20T00:01:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"883bae94ff2fc62e21fb5ee30815f5c9ef232e4908e27f5ccf176f7fa4d411df","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-15T02:04:12Z","title_canon_sha256":"205a12a65d62f544b2982b6c8f2121ea2c905435acae6af428c50bf3c8c6dfe0"},"schema_version":"1.0","source":{"id":"2605.15532","kind":"arxiv","version":1}},"canonical_sha256":"df0111f1f6304aa47bf9fda53f40b6011f2f9be2ff2b28c78881d1e4945a2ff0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"df0111f1f6304aa47bf9fda53f40b6011f2f9be2ff2b28c78881d1e4945a2ff0","first_computed_at":"2026-05-20T00:01:03.759525Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:01:03.759525Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yk29X1gFcoZYbzYGvH1xIAS0aAY0DQ0FMCDvpwNvxNQDvfDYcQ8GzohSYSvWY7EhZXYiTNkHGm7N9p84vDXyAA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:01:03.760420Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.15532","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ce19f5b24f711d5acdff0b52e4c1bb35e9461a249c85d992375bcfc97bb98ae1","sha256:fd6f0ef88d5974bcc57455b54ccd8fa9ffbc2a282a6ea31006938d5cf408b169"],"state_sha256":"ab26b9ed188df1831d8f82d16dc7b19388ace9e3892d1ff405425a41d6e89a81"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3X9cP0Gyu5vXcoTyxgdA6o31D6yJfxs5Y+7XplpFsvdujC/VH58FbMOZzJXarBYtxMtm0dX3objRkA1rMQ1dAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T10:09:42.929586Z","bundle_sha256":"53ca034a8db21b013feb95813020261f815d9be67648b73514cbd0b53501a2b7"}}