{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MZOWVIFNXCBANOVFMMGS72NPIQ","short_pith_number":"pith:MZOWVIFN","schema_version":"1.0","canonical_sha256":"665d6aa0adb88206baa5630d2fe9af4428c07afb68794f98f68661cc365f3292","source":{"kind":"arxiv","id":"2606.28387","version":1},"attestation_state":"computed","paper":{"title":"Schema-First Retrieval: Embedding Catalogs for Natural Language Analytics","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.IR","authors_text":"Adarsh Agrawal, Shashank Indukuri","submitted_at":"2026-06-23T02:20:36Z","abstract_excerpt":"Enterprise text-to-SQL systems often fail before SQL is generated: the model receives the wrong schema context. Modern warehouses contain thousands of tables, abbreviated columns, informal metrics, hidden join conventions, and permission boundaries that are not captured by raw table names. We introduce Schema-First Retrieval, a retrieval layer that embeds catalog metadata rather than warehouse rows. The system indexes five typed catalog objects, tables, columns, metrics, relationships, and query history, using object-specific text templates. At query time, it combines parallel vector search, l"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.28387","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-06-23T02:20:36Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ec39f0235e87812652ae57c8080389659077eaa89ed96bd371e00e7028f1a950","abstract_canon_sha256":"cff5f9328307386b9bb4bd6d719b65ebd7d711989e79463a20f7ad871b83e2f7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T00:15:12.668876Z","signature_b64":"LmFaFQc46m373OlrDeRuFeKbZz0XINrvTpBobw5besy8PwpmzOsMlGKWmb0u+2Tmni6DH/XRcVHFLnP1CDjJDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"665d6aa0adb88206baa5630d2fe9af4428c07afb68794f98f68661cc365f3292","last_reissued_at":"2026-06-30T00:15:12.668416Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T00:15:12.668416Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Schema-First Retrieval: Embedding Catalogs for Natural Language Analytics","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.IR","authors_text":"Adarsh Agrawal, Shashank Indukuri","submitted_at":"2026-06-23T02:20:36Z","abstract_excerpt":"Enterprise text-to-SQL systems often fail before SQL is generated: the model receives the wrong schema context. Modern warehouses contain thousands of tables, abbreviated columns, informal metrics, hidden join conventions, and permission boundaries that are not captured by raw table names. We introduce Schema-First Retrieval, a retrieval layer that embeds catalog metadata rather than warehouse rows. The system indexes five typed catalog objects, tables, columns, metrics, relationships, and query history, using object-specific text templates. At query time, it combines parallel vector search, l"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28387","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.28387/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.28387","created_at":"2026-06-30T00:15:12.668476+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.28387v1","created_at":"2026-06-30T00:15:12.668476+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28387","created_at":"2026-06-30T00:15:12.668476+00:00"},{"alias_kind":"pith_short_12","alias_value":"MZOWVIFNXCBA","created_at":"2026-06-30T00:15:12.668476+00:00"},{"alias_kind":"pith_short_16","alias_value":"MZOWVIFNXCBANOVF","created_at":"2026-06-30T00:15:12.668476+00:00"},{"alias_kind":"pith_short_8","alias_value":"MZOWVIFN","created_at":"2026-06-30T00:15:12.668476+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ","json":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ.json","graph_json":"https://pith.science/api/pith-number/MZOWVIFNXCBANOVFMMGS72NPIQ/graph.json","events_json":"https://pith.science/api/pith-number/MZOWVIFNXCBANOVFMMGS72NPIQ/events.json","paper":"https://pith.science/paper/MZOWVIFN"},"agent_actions":{"view_html":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ","download_json":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ.json","view_paper":"https://pith.science/paper/MZOWVIFN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.28387&json=true","fetch_graph":"https://pith.science/api/pith-number/MZOWVIFNXCBANOVFMMGS72NPIQ/graph.json","fetch_events":"https://pith.science/api/pith-number/MZOWVIFNXCBANOVFMMGS72NPIQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ/action/storage_attestation","attest_author":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ/action/author_attestation","sign_citation":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ/action/citation_signature","submit_replication":"https://pith.science/pith/MZOWVIFNXCBANOVFMMGS72NPIQ/action/replication_record"}},"created_at":"2026-06-30T00:15:12.668476+00:00","updated_at":"2026-06-30T00:15:12.668476+00:00"}