{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4WFXYNRAL4DLDXY2VNADR2PWBU","short_pith_number":"pith:4WFXYNRA","canonical_record":{"source":{"id":"2605.12879","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T01:48:46Z","cross_cats_sorted":[],"title_canon_sha256":"315ca8f9c2b2f4186685a68e65f5f38ec2da07886d9eddee96841a5fcaf41f5c","abstract_canon_sha256":"f66a07ec8a708252cb6f438cd1b697700afadec4abd2233d39d9774c1174e4f6"},"schema_version":"1.0"},"canonical_sha256":"e58b7c36205f06b1df1aab4038e9f60d19d759707b19e5ec549b83e2b5c2bdc1","source":{"kind":"arxiv","id":"2605.12879","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12879","created_at":"2026-05-18T03:09:11Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12879v1","created_at":"2026-05-18T03:09:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12879","created_at":"2026-05-18T03:09:11Z"},{"alias_kind":"pith_short_12","alias_value":"4WFXYNRAL4DL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4WFXYNRAL4DLDXY2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4WFXYNRA","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4WFXYNRAL4DLDXY2VNADR2PWBU","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12879","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T01:48:46Z","cross_cats_sorted":[],"title_canon_sha256":"315ca8f9c2b2f4186685a68e65f5f38ec2da07886d9eddee96841a5fcaf41f5c","abstract_canon_sha256":"f66a07ec8a708252cb6f438cd1b697700afadec4abd2233d39d9774c1174e4f6"},"schema_version":"1.0"},"canonical_sha256":"e58b7c36205f06b1df1aab4038e9f60d19d759707b19e5ec549b83e2b5c2bdc1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:11.127720Z","signature_b64":"oRXG0loqo1sREg9Q+10VklbZjZpG/ReoEkcsjbYloUSAVuMO0FSXAYgIVmvByQl4nz7K0hTpEYyWJFyyDnHNBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e58b7c36205f06b1df1aab4038e9f60d19d759707b19e5ec549b83e2b5c2bdc1","last_reissued_at":"2026-05-18T03:09:11.126940Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:11.126940Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12879","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QYVx6hws0XbJ7vcpl37QtMrdIl1AdkjANuTNHXqwkVqxLjUdQCDEfCQXI9ByqRxKSLaJeDbHFFiZq1S9DJ85Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T02:47:08.763948Z"},"content_sha256":"b4ec3bd267a6e3d9ea3e85e411fe710a5aa3f230b191adb5cf082e1acf7043aa","schema_version":"1.0","event_id":"sha256:b4ec3bd267a6e3d9ea3e85e411fe710a5aa3f230b191adb5cf082e1acf7043aa"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4WFXYNRAL4DLDXY2VNADR2PWBU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ASAP: Amortized Doubly-Stochastic Attention via Sliced Dual Projection","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"ASAP replaces iterative Sinkhorn scaling in doubly-stochastic attention with a learned fixed sliced dual projection for faster inference.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"David Hyde, Huy Tran, Max Milkert","submitted_at":"2026-05-13T01:48:46Z","abstract_excerpt":"Doubly-stochastic attention has emerged as a transport-based alternative to row-softmax attention, with recent Transformer variants using it to reduce attention sinks and rank collapse while improving performance. In this family, the standard approach is Sinkhorn scaling, which trains more efficiently but still repeats matrix scaling in every inference forward pass. Sliced-transport attention removes the online iteration, but its soft sorting approximation materializes dense tensors for each slice, requiring substantially more training resources than Sinkhorn attention. We introduce ASAP: Amor"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"In the main frozen-layer benchmark, ASAP is 5.3 faster than the trained Sinkhorn teacher while matching its accuracy; in downstream replacements, ASAP recovers most of the teacher performance without any retraining.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The lightweight parametric map learned from exact one-dimensional Kantorovich potentials to the Sinkhorn query-side dual generalizes accurately enough at inference to preserve the doubly-stochastic properties and downstream performance for inputs outside the training distribution.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"ASAP amortizes Sinkhorn-based doubly-stochastic attention by learning a parametric map from 1D potentials to the Sinkhorn dual and reconstructing the plan via two-sided entropic c-transform, delivering 5.3x faster inference at matched accuracy.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"ASAP replaces iterative Sinkhorn scaling in doubly-stochastic attention with a learned fixed sliced dual projection for faster inference.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"78688452a597fde30f50306790bf8f9266c05c196c1b8d0869468112a029e09c"},"source":{"id":"2605.12879","kind":"arxiv","version":1},"verdict":{"id":"fb793dc8-846a-45c2-b600-31f63e24f58c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T20:14:41.926591Z","strongest_claim":"In the main frozen-layer benchmark, ASAP is 5.3 faster than the trained Sinkhorn teacher while matching its accuracy; in downstream replacements, ASAP recovers most of the teacher performance without any retraining.","one_line_summary":"ASAP amortizes Sinkhorn-based doubly-stochastic attention by learning a parametric map from 1D potentials to the Sinkhorn dual and reconstructing the plan via two-sided entropic c-transform, delivering 5.3x faster inference at matched accuracy.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The lightweight parametric map learned from exact one-dimensional Kantorovich potentials to the Sinkhorn query-side dual generalizes accurately enough at inference to preserve the doubly-stochastic properties and downstream performance for inputs outside the training distribution.","pith_extraction_headline":"ASAP replaces iterative Sinkhorn scaling in doubly-stochastic attention with a learned fixed sliced dual projection for faster inference."},"references":{"count":49,"sample":[{"doi":"","year":null,"title":"Scaling Learning Algorithms Towards","work_id":"bb2761cc-98d0-411b-92f6-803773d64460","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"and Osindero, Simon and Teh, Yee Whye , journal =","work_id":"0a5921e3-ac4e-46f1-85ae-866119a87be0","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2016,"title":"Deep learning , author=. 2016 , publisher=","work_id":"cf0899e0-53ee-4591-aae4-f38fa5ac12ad","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Proceedings of The 25th International Conference on Artificial Intelligence and Statistics , pages=","work_id":"7e2ad380-4621-4ad7-ad02-93bca0234f52","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"International Conference on Learning Representations , year=","work_id":"c1a5d24b-513c-43b6-a8b5-79a2a5895997","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":49,"snapshot_sha256":"a635f6dee9b236074cd48990ee82a7528d4e1595759ef2f17d7b8d2236c3dbe9","internal_anchors":4},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b8c51d818f0f6eb1598ee0cd3d831ad84da6a8c00beb2d9b9f28cc0d9fbacf96"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"fb793dc8-846a-45c2-b600-31f63e24f58c"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/pwVwGwn5lc/buUdlVhz3Kn5eCT1jMr4M5a63F/36tjNuCx3JB/E86iQlhe1XOx/6TbObulHvaUOuFTBjJwtDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T02:47:08.764678Z"},"content_sha256":"9d06e760d1b4d572b1fa0c1a44dd4bde9f4927acf094fb44577359fb4a499868","schema_version":"1.0","event_id":"sha256:9d06e760d1b4d572b1fa0c1a44dd4bde9f4927acf094fb44577359fb4a499868"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4WFXYNRAL4DLDXY2VNADR2PWBU/bundle.json","state_url":"https://pith.science/pith/4WFXYNRAL4DLDXY2VNADR2PWBU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4WFXYNRAL4DLDXY2VNADR2PWBU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T02:47:08Z","links":{"resolver":"https://pith.science/pith/4WFXYNRAL4DLDXY2VNADR2PWBU","bundle":"https://pith.science/pith/4WFXYNRAL4DLDXY2VNADR2PWBU/bundle.json","state":"https://pith.science/pith/4WFXYNRAL4DLDXY2VNADR2PWBU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4WFXYNRAL4DLDXY2VNADR2PWBU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4WFXYNRAL4DLDXY2VNADR2PWBU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f66a07ec8a708252cb6f438cd1b697700afadec4abd2233d39d9774c1174e4f6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T01:48:46Z","title_canon_sha256":"315ca8f9c2b2f4186685a68e65f5f38ec2da07886d9eddee96841a5fcaf41f5c"},"schema_version":"1.0","source":{"id":"2605.12879","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12879","created_at":"2026-05-18T03:09:11Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12879v1","created_at":"2026-05-18T03:09:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12879","created_at":"2026-05-18T03:09:11Z"},{"alias_kind":"pith_short_12","alias_value":"4WFXYNRAL4DL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4WFXYNRAL4DLDXY2","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4WFXYNRA","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:9d06e760d1b4d572b1fa0c1a44dd4bde9f4927acf094fb44577359fb4a499868","target":"graph","created_at":"2026-05-18T03:09:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"In the main frozen-layer benchmark, ASAP is 5.3 faster than the trained Sinkhorn teacher while matching its accuracy; in downstream replacements, ASAP recovers most of the teacher performance without any retraining."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The lightweight parametric map learned from exact one-dimensional Kantorovich potentials to the Sinkhorn query-side dual generalizes accurately enough at inference to preserve the doubly-stochastic properties and downstream performance for inputs outside the training distribution."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"ASAP amortizes Sinkhorn-based doubly-stochastic attention by learning a parametric map from 1D potentials to the Sinkhorn dual and reconstructing the plan via two-sided entropic c-transform, delivering 5.3x faster inference at matched accuracy."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"ASAP replaces iterative Sinkhorn scaling in doubly-stochastic attention with a learned fixed sliced dual projection for faster inference."}],"snapshot_sha256":"78688452a597fde30f50306790bf8f9266c05c196c1b8d0869468112a029e09c"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b8c51d818f0f6eb1598ee0cd3d831ad84da6a8c00beb2d9b9f28cc0d9fbacf96"},"paper":{"abstract_excerpt":"Doubly-stochastic attention has emerged as a transport-based alternative to row-softmax attention, with recent Transformer variants using it to reduce attention sinks and rank collapse while improving performance. In this family, the standard approach is Sinkhorn scaling, which trains more efficiently but still repeats matrix scaling in every inference forward pass. Sliced-transport attention removes the online iteration, but its soft sorting approximation materializes dense tensors for each slice, requiring substantially more training resources than Sinkhorn attention. We introduce ASAP: Amor","authors_text":"David Hyde, Huy Tran, Max Milkert","cross_cats":[],"headline":"ASAP replaces iterative Sinkhorn scaling in doubly-stochastic attention with a learned fixed sliced dual projection for faster inference.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T01:48:46Z","title":"ASAP: Amortized Doubly-Stochastic Attention via Sliced Dual Projection"},"references":{"count":49,"internal_anchors":4,"resolved_work":49,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Scaling Learning Algorithms Towards","work_id":"bb2761cc-98d0-411b-92f6-803773d64460","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"and Osindero, Simon and Teh, Yee Whye , journal =","work_id":"0a5921e3-ac4e-46f1-85ae-866119a87be0","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Deep learning , author=. 2016 , publisher=","work_id":"cf0899e0-53ee-4591-aae4-f38fa5ac12ad","year":2016},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Proceedings of The 25th International Conference on Artificial Intelligence and Statistics , pages=","work_id":"7e2ad380-4621-4ad7-ad02-93bca0234f52","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"International Conference on Learning Representations , year=","work_id":"c1a5d24b-513c-43b6-a8b5-79a2a5895997","year":null}],"snapshot_sha256":"a635f6dee9b236074cd48990ee82a7528d4e1595759ef2f17d7b8d2236c3dbe9"},"source":{"id":"2605.12879","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:14:41.926591Z","id":"fb793dc8-846a-45c2-b600-31f63e24f58c","model_set":{"reader":"grok-4.3"},"one_line_summary":"ASAP amortizes Sinkhorn-based doubly-stochastic attention by learning a parametric map from 1D potentials to the Sinkhorn dual and reconstructing the plan via two-sided entropic c-transform, delivering 5.3x faster inference at matched accuracy.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"ASAP replaces iterative Sinkhorn scaling in doubly-stochastic attention with a learned fixed sliced dual projection for faster inference.","strongest_claim":"In the main frozen-layer benchmark, ASAP is 5.3 faster than the trained Sinkhorn teacher while matching its accuracy; in downstream replacements, ASAP recovers most of the teacher performance without any retraining.","weakest_assumption":"The lightweight parametric map learned from exact one-dimensional Kantorovich potentials to the Sinkhorn query-side dual generalizes accurately enough at inference to preserve the doubly-stochastic properties and downstream performance for inputs outside the training distribution."}},"verdict_id":"fb793dc8-846a-45c2-b600-31f63e24f58c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b4ec3bd267a6e3d9ea3e85e411fe710a5aa3f230b191adb5cf082e1acf7043aa","target":"record","created_at":"2026-05-18T03:09:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f66a07ec8a708252cb6f438cd1b697700afadec4abd2233d39d9774c1174e4f6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T01:48:46Z","title_canon_sha256":"315ca8f9c2b2f4186685a68e65f5f38ec2da07886d9eddee96841a5fcaf41f5c"},"schema_version":"1.0","source":{"id":"2605.12879","kind":"arxiv","version":1}},"canonical_sha256":"e58b7c36205f06b1df1aab4038e9f60d19d759707b19e5ec549b83e2b5c2bdc1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e58b7c36205f06b1df1aab4038e9f60d19d759707b19e5ec549b83e2b5c2bdc1","first_computed_at":"2026-05-18T03:09:11.126940Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:11.126940Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"oRXG0loqo1sREg9Q+10VklbZjZpG/ReoEkcsjbYloUSAVuMO0FSXAYgIVmvByQl4nz7K0hTpEYyWJFyyDnHNBw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:11.127720Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12879","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b4ec3bd267a6e3d9ea3e85e411fe710a5aa3f230b191adb5cf082e1acf7043aa","sha256:9d06e760d1b4d572b1fa0c1a44dd4bde9f4927acf094fb44577359fb4a499868"],"state_sha256":"7fe8ca3fdcb0ac4448be1d6192041904db273336c5d61cab48246895888a6e7b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ovdhf8imPvTvhOKZYVYzVfsvPdgNho+gYr0Nmxj+S2T3fj4nbvSC2nHS8RIQKvCw2tIrOszLwXOQfX9COqCiDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T02:47:08.768275Z","bundle_sha256":"af525a8e7cb7aacc5141ace50d9ce1db8561a7036eb498ffa622a087ef093b52"}}