{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:FCAPCZUOBLQZO2FYJLUBLJHU4Q","short_pith_number":"pith:FCAPCZUO","schema_version":"1.0","canonical_sha256":"2880f1668e0ae19768b84ae815a4f4e43f3fbdd86b4a971c7b65902a512663af","source":{"kind":"arxiv","id":"2605.15957","version":1},"attestation_state":"computed","paper":{"title":"To GPU or Not to GPU: Vector Search in Relational Engines","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"An alternative organization of vector indexes and embeddings lets GPUs accelerate both relational queries and vector search in database engines.","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Bowen Wu, Gustavo Alonso, Joel Andr\\'e, Marko Kabi\\'c, Vasilis Mageirakos, Yannis Chronis","submitted_at":"2026-05-15T13:50:42Z","abstract_excerpt":"Vector search (VS) is now available in most database engines. However, while vector search is a common feature in AI/ML/LLMs where the dominant computing platforms are GPUs, existing database engines operate on CPUs even when implementing vector search. This raises the question of whether integrating vector processing on GPUs as part of the engine would be a better design. In this paper, we explore this question in detail. First, we extend the TPC-H benchmark with vector data (from text and images) and propose a number of representative SQL+VS queries. Second, we develop a modular execution en"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.15957","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-05-15T13:50:42Z","cross_cats_sorted":[],"title_canon_sha256":"9497914bf7a25ad372af5becbee311b9eaab2b3fe6428651ca9fb67e73157684","abstract_canon_sha256":"7f76e6644e441bb625f651ed6e57c0f81803465431011d7a5516ddc1b04b2c1c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:46.610764Z","signature_b64":"NOWzgR7RvOR7cq0jZAm505XeyKpb4NX0oUx7o3K3YvH5kmM2PrkLTTUeNsxJcQfHoRoS4XNYqXFRsXyWFwNoCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2880f1668e0ae19768b84ae815a4f4e43f3fbdd86b4a971c7b65902a512663af","last_reissued_at":"2026-05-20T00:01:46.610061Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:46.610061Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"To GPU or Not to GPU: Vector Search in Relational Engines","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"An alternative organization of vector indexes and embeddings lets GPUs accelerate both relational queries and vector search in database engines.","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Bowen Wu, Gustavo Alonso, Joel Andr\\'e, Marko Kabi\\'c, Vasilis Mageirakos, Yannis Chronis","submitted_at":"2026-05-15T13:50:42Z","abstract_excerpt":"Vector search (VS) is now available in most database engines. However, while vector search is a common feature in AI/ML/LLMs where the dominant computing platforms are GPUs, existing database engines operate on CPUs even when implementing vector search. This raises the question of whether integrating vector processing on GPUs as part of the engine would be a better design. In this paper, we explore this question in detail. First, we extend the TPC-H benchmark with vector data (from text and images) and propose a number of representative SQL+VS queries. Second, we develop a modular execution en"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"With an alternative organization of vector index and embeddings that reduces index size, both the relational and vector search components are faster on the GPU, particularly on fast interconnects, in contrast with the architecture used in existing engines.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The modular execution engine developed for the experiments accurately models the overheads and integration costs that would appear in a production relational database engine when adding GPU vector search support.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Relational engines achieve faster SQL+vector-search queries on GPU than CPU when using compact vector indexes and fast interconnects, reversing the CPU-only design in current systems.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"An alternative organization of vector indexes and embeddings lets GPUs accelerate both relational queries and vector search in database engines.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d2e499d81e91d6ddec44acd8cfbb0a5be27a3846597b9fbfb962007413ae9a2f"},"source":{"id":"2605.15957","kind":"arxiv","version":1},"verdict":{"id":"659fdc14-02ab-40c0-a053-9fd6e3e1d962","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T19:01:07.638223Z","strongest_claim":"With an alternative organization of vector index and embeddings that reduces index size, both the relational and vector search components are faster on the GPU, particularly on fast interconnects, in contrast with the architecture used in existing engines.","one_line_summary":"Relational engines achieve faster SQL+vector-search queries on GPU than CPU when using compact vector indexes and fast interconnects, reversing the CPU-only design in current systems.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The modular execution engine developed for the experiments accurately models the overheads and integration costs that would appear in a production relational database engine when adding GPU vector search support.","pith_extraction_headline":"An alternative organization of vector indexes and embeddings lets GPUs accelerate both relational queries and vector search in database engines."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15957/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T19:31:19.028370Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T19:12:07.700603Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T17:33:44.877127Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T17:01:55.705790Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"8033c3c1d098fab6e1a7e0b6abcde839e1d18fc16f25aa34b00807d978012a77"},"references":{"count":65,"sample":[{"doi":"","year":2024,"title":"DuckDB Vector Similarity Search (VSS) Extension","work_id":"49338dd3-9a04-4d07-9283-eb2ac08bd4ee","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"Apache Software Foundation. 2026. Apache Arrow: A Cross-Language Devel- opment Platform for In-Memory Data. https://arrow.apache.org/. Accessed: 12 2026-04-29","work_id":"20fd8d9c-4abc-40f4-8ecf-6128338fc86e","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"Felipe Aramburú, William Malpica, Kaouther Abrougui, Amin Aramoon, Ro- mulo Auccapuclla, Claude Brisson, Matthijs Brobbel, Colby Farrell, Pradeep Garigipati, Joost Hoozemans, et al. 2025. Theseus: A D","work_id":"7b76f3c7-1c97-4231-bd2e-a7e164e0a5b9","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1016/j.is.2019.02.006","year":2020,"title":"Martin Aumüller, Erik Bernhardsson, and Alexander Faithfull. 2020. ANN- Benchmarks: A benchmarking tool for approximate nearest neighbor algorithms. Information Systems87 (2020), 101374. https://doi.o","work_id":"d21addbf-2773-4624-9c69-6171bf0c712b","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2016,"title":"David Boehme, Todd Gamblin, David Beckingsale, Peer-Timo Bremer, Alfredo Gimenez, Matthew LeGendre, Olga Pearce, and Martin Schulz. 2016. Caliper: performance introspection for HPC software stacks. In","work_id":"4b048fe8-3b4c-4fac-9054-8482945fb2c1","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":65,"snapshot_sha256":"96abee6aac2b2982a0ab147c4482944fb12bd1fd7f8a2acf25e0a404dbaaf0c2","internal_anchors":3},"formal_canon":{"evidence_count":2,"snapshot_sha256":"9c043aadc5035a4526236809b6f780ec45008d92f5044513bca77b15915df709"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15957","created_at":"2026-05-20T00:01:46.610164+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15957v1","created_at":"2026-05-20T00:01:46.610164+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15957","created_at":"2026-05-20T00:01:46.610164+00:00"},{"alias_kind":"pith_short_12","alias_value":"FCAPCZUOBLQZ","created_at":"2026-05-20T00:01:46.610164+00:00"},{"alias_kind":"pith_short_16","alias_value":"FCAPCZUOBLQZO2FY","created_at":"2026-05-20T00:01:46.610164+00:00"},{"alias_kind":"pith_short_8","alias_value":"FCAPCZUO","created_at":"2026-05-20T00:01:46.610164+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q","json":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q.json","graph_json":"https://pith.science/api/pith-number/FCAPCZUOBLQZO2FYJLUBLJHU4Q/graph.json","events_json":"https://pith.science/api/pith-number/FCAPCZUOBLQZO2FYJLUBLJHU4Q/events.json","paper":"https://pith.science/paper/FCAPCZUO"},"agent_actions":{"view_html":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q","download_json":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q.json","view_paper":"https://pith.science/paper/FCAPCZUO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15957&json=true","fetch_graph":"https://pith.science/api/pith-number/FCAPCZUOBLQZO2FYJLUBLJHU4Q/graph.json","fetch_events":"https://pith.science/api/pith-number/FCAPCZUOBLQZO2FYJLUBLJHU4Q/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q/action/storage_attestation","attest_author":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q/action/author_attestation","sign_citation":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q/action/citation_signature","submit_replication":"https://pith.science/pith/FCAPCZUOBLQZO2FYJLUBLJHU4Q/action/replication_record"}},"created_at":"2026-05-20T00:01:46.610164+00:00","updated_at":"2026-05-20T00:01:46.610164+00:00"}