{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:VHTOSQO4Q7SJSHXFQXUHCYYMC3","short_pith_number":"pith:VHTOSQO4","schema_version":"1.0","canonical_sha256":"a9e6e941dc87e4991ee585e871630c16e87c60ec750997a4e8724fd72505c993","source":{"kind":"arxiv","id":"2602.10908","version":2},"attestation_state":"computed","paper":{"title":"SoftMatcha 2: A Fast and Soft Pattern Matcher for Trillion-Scale Corpora","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Go kamoda, Kohei Suenaga, Masaki Waga, Masataka Yoneda, Sho Yokoi, Takuya Akiba, Yusuke Matsushita","submitted_at":"2026-02-11T14:40:15Z","abstract_excerpt":"We present SoftMatcha 2, an ultra-fast and flexible search algorithm that enables search over trillion-scale natural language corpora in under 0.3 seconds while allowing semantic variations in the form of substitution, insertion, and deletion. Our approach employs string matching based on suffix arrays that scales well with corpus size, and represents words as vectors, which underpin its semantic flexibility. To mitigate the combinatorial explosion induced by the semantic relaxation of queries, our method is built on two key algorithmic ideas: dynamic corpus-aware pruning and fast exact lookup"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.10908","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-02-11T14:40:15Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"7cf8652e0925c7424a2c19ae43b1d3272769d110ced8619fad5ebb962d52f99c","abstract_canon_sha256":"ef821c678316925fb424024b96e5b52a5f0cabd8bae71e6e3efad1a60037ac72"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:10:33.188790Z","signature_b64":"Erjg/Qsl5N0t073GO6QorcbHnQTZ0q+1mpfFJuNvbk00KGXCwDXbQ9MrmiTQj2duE/hCMVmy+pKzutbXXpwfBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a9e6e941dc87e4991ee585e871630c16e87c60ec750997a4e8724fd72505c993","last_reissued_at":"2026-06-11T01:10:33.187597Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:10:33.187597Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SoftMatcha 2: A Fast and Soft Pattern Matcher for Trillion-Scale Corpora","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Go kamoda, Kohei Suenaga, Masaki Waga, Masataka Yoneda, Sho Yokoi, Takuya Akiba, Yusuke Matsushita","submitted_at":"2026-02-11T14:40:15Z","abstract_excerpt":"We present SoftMatcha 2, an ultra-fast and flexible search algorithm that enables search over trillion-scale natural language corpora in under 0.3 seconds while allowing semantic variations in the form of substitution, insertion, and deletion. Our approach employs string matching based on suffix arrays that scales well with corpus size, and represents words as vectors, which underpin its semantic flexibility. To mitigate the combinatorial explosion induced by the semantic relaxation of queries, our method is built on two key algorithmic ideas: dynamic corpus-aware pruning and fast exact lookup"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.10908","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.10908/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.10908","created_at":"2026-06-11T01:10:33.187812+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.10908v2","created_at":"2026-06-11T01:10:33.187812+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.10908","created_at":"2026-06-11T01:10:33.187812+00:00"},{"alias_kind":"pith_short_12","alias_value":"VHTOSQO4Q7SJ","created_at":"2026-06-11T01:10:33.187812+00:00"},{"alias_kind":"pith_short_16","alias_value":"VHTOSQO4Q7SJSHXF","created_at":"2026-06-11T01:10:33.187812+00:00"},{"alias_kind":"pith_short_8","alias_value":"VHTOSQO4","created_at":"2026-06-11T01:10:33.187812+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3","json":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3.json","graph_json":"https://pith.science/api/pith-number/VHTOSQO4Q7SJSHXFQXUHCYYMC3/graph.json","events_json":"https://pith.science/api/pith-number/VHTOSQO4Q7SJSHXFQXUHCYYMC3/events.json","paper":"https://pith.science/paper/VHTOSQO4"},"agent_actions":{"view_html":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3","download_json":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3.json","view_paper":"https://pith.science/paper/VHTOSQO4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.10908&json=true","fetch_graph":"https://pith.science/api/pith-number/VHTOSQO4Q7SJSHXFQXUHCYYMC3/graph.json","fetch_events":"https://pith.science/api/pith-number/VHTOSQO4Q7SJSHXFQXUHCYYMC3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3/action/storage_attestation","attest_author":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3/action/author_attestation","sign_citation":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3/action/citation_signature","submit_replication":"https://pith.science/pith/VHTOSQO4Q7SJSHXFQXUHCYYMC3/action/replication_record"}},"created_at":"2026-06-11T01:10:33.187812+00:00","updated_at":"2026-06-11T01:10:33.187812+00:00"}