{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:VXCCFQD6MFQASS2BG57JTAKX2E","short_pith_number":"pith:VXCCFQD6","schema_version":"1.0","canonical_sha256":"adc422c07e6160094b41377e998157d12df3ba60cb90841004af928379fd2118","source":{"kind":"arxiv","id":"1603.07150","version":1},"attestation_state":"computed","paper":{"title":"The Anatomy of a Search and Mining System for Digital Archives","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR"],"primary_cat":"cs.DL","authors_text":"Dan Levene, Dell Zhang, Mark Levene, Martyn Harris","submitted_at":"2016-03-23T12:02:12Z","abstract_excerpt":"Samtla (Search And Mining Tools with Linguistic Analysis) is a digital humanities system designed in collaboration with historians and linguists to assist them with their research work in quantifying the content of any textual corpora through approximate phrase search and document comparison. The retrieval engine uses a character-based n-gram language model rather than the conventional word-based one so as to achieve great flexibility in language agnostic query processing.\n  The index is implemented as a space-optimised character-based suffix tree with an accompanying database of document cont"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1603.07150","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2016-03-23T12:02:12Z","cross_cats_sorted":["cs.CL","cs.IR"],"title_canon_sha256":"5202f3ac6064f3893d8aceaaf4a82bf8a06d8adf25a9d8ea22ba0b9a041f7034","abstract_canon_sha256":"848967f263187601a2c99620271168e1998e7a7cfd0e8482aa46bed399f1ed6a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:18:24.267870Z","signature_b64":"armSaK9qQvNbP5bX++682wbrb9k8j++ixT5xkXeRfK5t1M5pNTXmTjREm9UDJE/Xnkgfcy7Od2+gjTtsTPVHCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"adc422c07e6160094b41377e998157d12df3ba60cb90841004af928379fd2118","last_reissued_at":"2026-05-18T01:18:24.267201Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:18:24.267201Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Anatomy of a Search and Mining System for Digital Archives","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR"],"primary_cat":"cs.DL","authors_text":"Dan Levene, Dell Zhang, Mark Levene, Martyn Harris","submitted_at":"2016-03-23T12:02:12Z","abstract_excerpt":"Samtla (Search And Mining Tools with Linguistic Analysis) is a digital humanities system designed in collaboration with historians and linguists to assist them with their research work in quantifying the content of any textual corpora through approximate phrase search and document comparison. The retrieval engine uses a character-based n-gram language model rather than the conventional word-based one so as to achieve great flexibility in language agnostic query processing.\n  The index is implemented as a space-optimised character-based suffix tree with an accompanying database of document cont"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.07150","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1603.07150","created_at":"2026-05-18T01:18:24.267297+00:00"},{"alias_kind":"arxiv_version","alias_value":"1603.07150v1","created_at":"2026-05-18T01:18:24.267297+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.07150","created_at":"2026-05-18T01:18:24.267297+00:00"},{"alias_kind":"pith_short_12","alias_value":"VXCCFQD6MFQA","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_16","alias_value":"VXCCFQD6MFQASS2B","created_at":"2026-05-18T12:30:48.956258+00:00"},{"alias_kind":"pith_short_8","alias_value":"VXCCFQD6","created_at":"2026-05-18T12:30:48.956258+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E","json":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E.json","graph_json":"https://pith.science/api/pith-number/VXCCFQD6MFQASS2BG57JTAKX2E/graph.json","events_json":"https://pith.science/api/pith-number/VXCCFQD6MFQASS2BG57JTAKX2E/events.json","paper":"https://pith.science/paper/VXCCFQD6"},"agent_actions":{"view_html":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E","download_json":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E.json","view_paper":"https://pith.science/paper/VXCCFQD6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1603.07150&json=true","fetch_graph":"https://pith.science/api/pith-number/VXCCFQD6MFQASS2BG57JTAKX2E/graph.json","fetch_events":"https://pith.science/api/pith-number/VXCCFQD6MFQASS2BG57JTAKX2E/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E/action/storage_attestation","attest_author":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E/action/author_attestation","sign_citation":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E/action/citation_signature","submit_replication":"https://pith.science/pith/VXCCFQD6MFQASS2BG57JTAKX2E/action/replication_record"}},"created_at":"2026-05-18T01:18:24.267297+00:00","updated_at":"2026-05-18T01:18:24.267297+00:00"}