{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PXTAGQFVTJREVOXA76QIXZFL7D","short_pith_number":"pith:PXTAGQFV","canonical_record":{"source":{"id":"2605.29384","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-28T05:36:37Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"8f826f9fab4c347129f2e468e28851290ac6bc8dc4a3e415efa461b78ee96269","abstract_canon_sha256":"2ba214ccbf20d93462e073576dcb62a8655510ab9e09a36180f1d6e57d48ccb0"},"schema_version":"1.0"},"canonical_sha256":"7de60340b59a624abae0ffa08be4abf8db8eb249d44960916a6a2fc4e5a542ac","source":{"kind":"arxiv","id":"2605.29384","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.29384","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"arxiv_version","alias_value":"2605.29384v1","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29384","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"pith_short_12","alias_value":"PXTAGQFVTJRE","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"pith_short_16","alias_value":"PXTAGQFVTJREVOXA","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"pith_short_8","alias_value":"PXTAGQFV","created_at":"2026-05-29T01:05:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PXTAGQFVTJREVOXA76QIXZFL7D","target":"record","payload":{"canonical_record":{"source":{"id":"2605.29384","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-28T05:36:37Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"8f826f9fab4c347129f2e468e28851290ac6bc8dc4a3e415efa461b78ee96269","abstract_canon_sha256":"2ba214ccbf20d93462e073576dcb62a8655510ab9e09a36180f1d6e57d48ccb0"},"schema_version":"1.0"},"canonical_sha256":"7de60340b59a624abae0ffa08be4abf8db8eb249d44960916a6a2fc4e5a542ac","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:36.346729Z","signature_b64":"tC1nM5ForypdOlQRFL0VhgtWJazUiIY+V3k9cwU96X5o3dw+eZUG/Trqtp77oeOPv4eA7BidQgbOSaLXbS5vAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7de60340b59a624abae0ffa08be4abf8db8eb249d44960916a6a2fc4e5a542ac","last_reissued_at":"2026-05-29T01:05:36.346209Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:36.346209Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.29384","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:05:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PDW46ARpP9mkQJqpZLKe7f4i1cXI56dCP1yWW7kiDRr3EIuG0IMISpi48wPJs6DTgBLhYpvySker6jPs5Eo+DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T07:27:37.497742Z"},"content_sha256":"1c00f19daed2001c2d42ab998f045efb0e6bf169762a791b5cef8fef0be6ee72","schema_version":"1.0","event_id":"sha256:1c00f19daed2001c2d42ab998f045efb0e6bf169762a791b5cef8fef0be6ee72"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PXTAGQFVTJREVOXA76QIXZFL7D","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Latent Terms: Dense Retrievers Contain Trivially Extractable BM25-ready Zipfian Vocabularies","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.IR","authors_text":"Aamir Shakir, Benjamin Clavi\\'e, Makoto P. Kato, Sean Lee","submitted_at":"2026-05-28T05:36:37Z","abstract_excerpt":"We propose Latent Terms, a method revealing that models trained for dense retrieval, whether single- or multi-vector, learn representations that can trivially be decomposed into retrieval-ready sparse features. When trained on frozen retrievers, Sparse Autoencoders without any retrieval-specific adjustments extract a latent vocabulary with approximately Zipfian collection statistics, directly suitable for classical sparse retrieval scoring via BM25. This approach enables sparse retrieval while requiring no learned expansion objective or sparse retrieval supervision whatsoever, and can be readi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29384","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.29384/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:05:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1BMaa4YXoS1gZja8Fqad156152PF/2VxROUVvax0Rg/9jI6jLTpI+GrdCCfQfXNXIDLYsJP27SZl90Mr3afTDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T07:27:37.498139Z"},"content_sha256":"cef9e0f498cf3d81238c078d05b23a6c1c38a5b8451c1bd10da5bc3152d10d33","schema_version":"1.0","event_id":"sha256:cef9e0f498cf3d81238c078d05b23a6c1c38a5b8451c1bd10da5bc3152d10d33"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PXTAGQFVTJREVOXA76QIXZFL7D/bundle.json","state_url":"https://pith.science/pith/PXTAGQFVTJREVOXA76QIXZFL7D/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PXTAGQFVTJREVOXA76QIXZFL7D/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T07:27:37Z","links":{"resolver":"https://pith.science/pith/PXTAGQFVTJREVOXA76QIXZFL7D","bundle":"https://pith.science/pith/PXTAGQFVTJREVOXA76QIXZFL7D/bundle.json","state":"https://pith.science/pith/PXTAGQFVTJREVOXA76QIXZFL7D/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PXTAGQFVTJREVOXA76QIXZFL7D/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PXTAGQFVTJREVOXA76QIXZFL7D","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2ba214ccbf20d93462e073576dcb62a8655510ab9e09a36180f1d6e57d48ccb0","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-28T05:36:37Z","title_canon_sha256":"8f826f9fab4c347129f2e468e28851290ac6bc8dc4a3e415efa461b78ee96269"},"schema_version":"1.0","source":{"id":"2605.29384","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.29384","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"arxiv_version","alias_value":"2605.29384v1","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29384","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"pith_short_12","alias_value":"PXTAGQFVTJRE","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"pith_short_16","alias_value":"PXTAGQFVTJREVOXA","created_at":"2026-05-29T01:05:36Z"},{"alias_kind":"pith_short_8","alias_value":"PXTAGQFV","created_at":"2026-05-29T01:05:36Z"}],"graph_snapshots":[{"event_id":"sha256:cef9e0f498cf3d81238c078d05b23a6c1c38a5b8451c1bd10da5bc3152d10d33","target":"graph","created_at":"2026-05-29T01:05:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.29384/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We propose Latent Terms, a method revealing that models trained for dense retrieval, whether single- or multi-vector, learn representations that can trivially be decomposed into retrieval-ready sparse features. When trained on frozen retrievers, Sparse Autoencoders without any retrieval-specific adjustments extract a latent vocabulary with approximately Zipfian collection statistics, directly suitable for classical sparse retrieval scoring via BM25. This approach enables sparse retrieval while requiring no learned expansion objective or sparse retrieval supervision whatsoever, and can be readi","authors_text":"Aamir Shakir, Benjamin Clavi\\'e, Makoto P. Kato, Sean Lee","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-28T05:36:37Z","title":"Latent Terms: Dense Retrievers Contain Trivially Extractable BM25-ready Zipfian Vocabularies"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29384","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1c00f19daed2001c2d42ab998f045efb0e6bf169762a791b5cef8fef0be6ee72","target":"record","created_at":"2026-05-29T01:05:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2ba214ccbf20d93462e073576dcb62a8655510ab9e09a36180f1d6e57d48ccb0","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-05-28T05:36:37Z","title_canon_sha256":"8f826f9fab4c347129f2e468e28851290ac6bc8dc4a3e415efa461b78ee96269"},"schema_version":"1.0","source":{"id":"2605.29384","kind":"arxiv","version":1}},"canonical_sha256":"7de60340b59a624abae0ffa08be4abf8db8eb249d44960916a6a2fc4e5a542ac","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7de60340b59a624abae0ffa08be4abf8db8eb249d44960916a6a2fc4e5a542ac","first_computed_at":"2026-05-29T01:05:36.346209Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:05:36.346209Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tC1nM5ForypdOlQRFL0VhgtWJazUiIY+V3k9cwU96X5o3dw+eZUG/Trqtp77oeOPv4eA7BidQgbOSaLXbS5vAg==","signature_status":"signed_v1","signed_at":"2026-05-29T01:05:36.346729Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.29384","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1c00f19daed2001c2d42ab998f045efb0e6bf169762a791b5cef8fef0be6ee72","sha256:cef9e0f498cf3d81238c078d05b23a6c1c38a5b8451c1bd10da5bc3152d10d33"],"state_sha256":"4131ef1a6b25c549ceefe0f605864d2c2f6e5fc0182fbcbd18140f00c9b01a29"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sHcYGtDoVOM2vr7kfPYsFm3R3hJiIjI/lC5NgVHcH9QnF0H8pTIC6tn60CpnOPeKMQFfz067EN7pfRjJ7cl5CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T07:27:37.500198Z","bundle_sha256":"c8b31d5b4c2b1def451b70b3b2a27bf8db266d4045d429278e4478447db4e877"}}