{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:EBPXMTFHOEFDNDEJZS4U47RQPK","short_pith_number":"pith:EBPXMTFH","schema_version":"1.0","canonical_sha256":"205f764ca7710a368c89ccb94e7e307aa5eb53301296fb98246614321020601f","source":{"kind":"arxiv","id":"2604.09173","version":2},"attestation_state":"computed","paper":{"title":"Decoupling Vector Data and Index Storage for Space Efficiency","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"COMPASS decouples vector data from index metadata to compress each separately, cutting storage by up to 58.7% while keeping search and update performance competitive.","cross_cats":["cs.OS"],"primary_cat":"cs.DB","authors_text":"Di Wu, Juncheng Zhang, Patrick P. C. Lee, Rui Yang, Yanjing Ren, Yuanming Ren","submitted_at":"2026-04-10T09:58:17Z","abstract_excerpt":"Managing large-scale vector datasets with disk-resident graph approximate nearest neighbor search (ANNS) systems incurs substantial storage overhead due to the co-location of vector data and auxiliary index metadata, which prevents the storage layer from exploiting their distinct compressibility. We present COMPASS, a component-aware compressed storage framework for disk-resident graph vector search. Leveraging data-index decoupling as a foundation, COMPASS losslessly compresses each component according to its distinct compressibility characteristics, thereby significantly reducing storage spa"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2604.09173","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-04-10T09:58:17Z","cross_cats_sorted":["cs.OS"],"title_canon_sha256":"7a3086fbafc1e369a8721945f04bd80b8c59037f4396ebc982b7b72e62623c01","abstract_canon_sha256":"59be6a55d422d9655fb28b3d83bcda2ad32cd4b00e36e2300851c48bcf5e3d5c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:41.184762Z","signature_b64":"fAx71KjcTZTVLeUJQDlG5U8VcgSoGyiDN4U1CLaDrUR+31jtRN9F/fb8jdHHHlPgfUBLlM3k7SRQY18r9FFwDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"205f764ca7710a368c89ccb94e7e307aa5eb53301296fb98246614321020601f","last_reissued_at":"2026-05-20T00:01:41.183984Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:41.183984Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Decoupling Vector Data and Index Storage for Space Efficiency","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"COMPASS decouples vector data from index metadata to compress each separately, cutting storage by up to 58.7% while keeping search and update performance competitive.","cross_cats":["cs.OS"],"primary_cat":"cs.DB","authors_text":"Di Wu, Juncheng Zhang, Patrick P. C. Lee, Rui Yang, Yanjing Ren, Yuanming Ren","submitted_at":"2026-04-10T09:58:17Z","abstract_excerpt":"Managing large-scale vector datasets with disk-resident graph approximate nearest neighbor search (ANNS) systems incurs substantial storage overhead due to the co-location of vector data and auxiliary index metadata, which prevents the storage layer from exploiting their distinct compressibility. We present COMPASS, a component-aware compressed storage framework for disk-resident graph vector search. Leveraging data-index decoupling as a foundation, COMPASS losslessly compresses each component according to its distinct compressibility characteristics, thereby significantly reducing storage spa"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"COMPASS reduces storage space by up to 58.7%, while delivering improved or competitive search and update performance compared to state-of-the-art disk-resident graph ANNS systems.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The assumption that vector data and auxiliary index metadata possess sufficiently distinct compressibility characteristics that can be exploited independently after decoupling without introducing unacceptable overhead in search or update paths.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"COMPASS decouples vector data and index storage in disk-resident graph ANNS systems to enable component-specific lossless compression, reducing space by up to 58.7% with improved or competitive performance.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"COMPASS decouples vector data from index metadata to compress each separately, cutting storage by up to 58.7% while keeping search and update performance competitive.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"79dd99846a81ef628aef563cec3d483cccd6b754c15f0f123ab3ea1acffc6452"},"source":{"id":"2604.09173","kind":"arxiv","version":2},"verdict":{"id":"89b35d54-671b-480c-aa1f-b13a922d473d","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T18:19:24.413198Z","strongest_claim":"COMPASS reduces storage space by up to 58.7%, while delivering improved or competitive search and update performance compared to state-of-the-art disk-resident graph ANNS systems.","one_line_summary":"COMPASS decouples vector data and index storage in disk-resident graph ANNS systems to enable component-specific lossless compression, reducing space by up to 58.7% with improved or competitive performance.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The assumption that vector data and auxiliary index metadata possess sufficiently distinct compressibility characteristics that can be exploited independently after decoupling without introducing unacceptable overhead in search or update paths.","pith_extraction_headline":"COMPASS decouples vector data from index metadata to compress each separately, cutting storage by up to 58.7% while keeping search and update performance competitive."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.09173/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":55,"sample":[{"doi":"","year":2025,"title":"Apache. Cassandra. https://cassandra.apache. org/, 2025","work_id":"1fd7f730-286c-4a7e-9a03-14134d385fcd","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Language models are few-shot learners.Proc","work_id":"86fe48dd-a5c8-4ff4-8046-4939b9720674","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Zhichao Cao, Siying Dong, Sagar Vemuri, and David H. C. Du. Characterizing, modeling, and benchmarking RocksDB key-value workloads at Facebook. InProc. of USENIX FAST, 2020","work_id":"4ef0b7a2-64dd-409c-a5bd-1c165de0d5df","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2006,"title":"Fay Chang, Jeffrey Dean, Sanjay Ghemawat, Wilson C. Hsieh, Deborah A. Wallach, Mike Burrows, Tushar Chandra, Andrew Fikes, and Robert E Gruber. Bigtable: A distributed storage system for structured da","work_id":"cb5d9d16-fa9a-4d40-acb7-63989a7c1523","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Sptag: A li- brary for fast approximate nearest neighbor search","work_id":"7276f1cd-8198-4bf3-9721-e36e9de987dc","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":55,"snapshot_sha256":"19a418bf1e22b3c886f3dd39fe32c37e515e474c590fb64b06a24fae5da0e006","internal_anchors":2},"formal_canon":{"evidence_count":2,"snapshot_sha256":"2f4009285ec88bb5c7f0a4975028941934ac6ad803d0c3cf1574a623c8455e05"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.09173","created_at":"2026-05-20T00:01:41.184109+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.09173v2","created_at":"2026-05-20T00:01:41.184109+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.09173","created_at":"2026-05-20T00:01:41.184109+00:00"},{"alias_kind":"pith_short_12","alias_value":"EBPXMTFHOEFD","created_at":"2026-05-20T00:01:41.184109+00:00"},{"alias_kind":"pith_short_16","alias_value":"EBPXMTFHOEFDNDEJ","created_at":"2026-05-20T00:01:41.184109+00:00"},{"alias_kind":"pith_short_8","alias_value":"EBPXMTFH","created_at":"2026-05-20T00:01:41.184109+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK","json":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK.json","graph_json":"https://pith.science/api/pith-number/EBPXMTFHOEFDNDEJZS4U47RQPK/graph.json","events_json":"https://pith.science/api/pith-number/EBPXMTFHOEFDNDEJZS4U47RQPK/events.json","paper":"https://pith.science/paper/EBPXMTFH"},"agent_actions":{"view_html":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK","download_json":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK.json","view_paper":"https://pith.science/paper/EBPXMTFH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.09173&json=true","fetch_graph":"https://pith.science/api/pith-number/EBPXMTFHOEFDNDEJZS4U47RQPK/graph.json","fetch_events":"https://pith.science/api/pith-number/EBPXMTFHOEFDNDEJZS4U47RQPK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK/action/storage_attestation","attest_author":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK/action/author_attestation","sign_citation":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK/action/citation_signature","submit_replication":"https://pith.science/pith/EBPXMTFHOEFDNDEJZS4U47RQPK/action/replication_record"}},"created_at":"2026-05-20T00:01:41.184109+00:00","updated_at":"2026-05-20T00:01:41.184109+00:00"}