{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:53LTHGWGCAERV4OIXSG3OIRWHA","short_pith_number":"pith:53LTHGWG","schema_version":"1.0","canonical_sha256":"eed7339ac610091af1c8bc8db7223638126fef9cb4710e1259504e0a0d0ad234","source":{"kind":"arxiv","id":"1508.03772","version":1},"attestation_state":"computed","paper":{"title":"Probabilistic, statistical and algorithmic aspects of the similarity of texts and application to Gospels comparison","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.PR"],"primary_cat":"stat.ME","authors_text":"Gane Samb Lo, Soumaila Dembele","submitted_at":"2015-08-15T22:12:22Z","abstract_excerpt":"The fundamental problem of similarity studies, in the frame of data-mining, is to examine and detect similar items in articles, papers, books, with huge sizes. In this paper, we are interested in the probabilistic, and the statistical and the algorithmic aspects in studies of texts. We will be using the approach of $k$\\textit{-shinglings}, a $k$\\textit{-shingling} being defined as a sequence of $k$ consecutive characters that are extracted from a text ($k\\geq 1$ ). The main stake in this field is to find accurate and quick algorithms to compute the similarity in short times. This will be achie"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1508.03772","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2015-08-15T22:12:22Z","cross_cats_sorted":["math.PR"],"title_canon_sha256":"45794db1a19e3dbee5248dc19ad17cc31137d8639719c510a4af99871cd172a8","abstract_canon_sha256":"214d7d72a9307ac3d3d8d4b73129cbb25deec5be5f9cbadff5537d3345e91eed"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:35:15.265258Z","signature_b64":"2FXD0k8tFYEtclihSYX6KqExw3ipRvtB0YejIItLlhAOYyOqgIewsdiD8s81+kri1BdQkV4hzafCwGetvKMbCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eed7339ac610091af1c8bc8db7223638126fef9cb4710e1259504e0a0d0ad234","last_reissued_at":"2026-05-18T01:35:15.264681Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:35:15.264681Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Probabilistic, statistical and algorithmic aspects of the similarity of texts and application to Gospels comparison","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.PR"],"primary_cat":"stat.ME","authors_text":"Gane Samb Lo, Soumaila Dembele","submitted_at":"2015-08-15T22:12:22Z","abstract_excerpt":"The fundamental problem of similarity studies, in the frame of data-mining, is to examine and detect similar items in articles, papers, books, with huge sizes. In this paper, we are interested in the probabilistic, and the statistical and the algorithmic aspects in studies of texts. We will be using the approach of $k$\\textit{-shinglings}, a $k$\\textit{-shingling} being defined as a sequence of $k$ consecutive characters that are extracted from a text ($k\\geq 1$ ). The main stake in this field is to find accurate and quick algorithms to compute the similarity in short times. This will be achie"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1508.03772","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1508.03772","created_at":"2026-05-18T01:35:15.264781+00:00"},{"alias_kind":"arxiv_version","alias_value":"1508.03772v1","created_at":"2026-05-18T01:35:15.264781+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1508.03772","created_at":"2026-05-18T01:35:15.264781+00:00"},{"alias_kind":"pith_short_12","alias_value":"53LTHGWGCAER","created_at":"2026-05-18T12:29:05.191682+00:00"},{"alias_kind":"pith_short_16","alias_value":"53LTHGWGCAERV4OI","created_at":"2026-05-18T12:29:05.191682+00:00"},{"alias_kind":"pith_short_8","alias_value":"53LTHGWG","created_at":"2026-05-18T12:29:05.191682+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA","json":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA.json","graph_json":"https://pith.science/api/pith-number/53LTHGWGCAERV4OIXSG3OIRWHA/graph.json","events_json":"https://pith.science/api/pith-number/53LTHGWGCAERV4OIXSG3OIRWHA/events.json","paper":"https://pith.science/paper/53LTHGWG"},"agent_actions":{"view_html":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA","download_json":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA.json","view_paper":"https://pith.science/paper/53LTHGWG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1508.03772&json=true","fetch_graph":"https://pith.science/api/pith-number/53LTHGWGCAERV4OIXSG3OIRWHA/graph.json","fetch_events":"https://pith.science/api/pith-number/53LTHGWGCAERV4OIXSG3OIRWHA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA/action/storage_attestation","attest_author":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA/action/author_attestation","sign_citation":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA/action/citation_signature","submit_replication":"https://pith.science/pith/53LTHGWGCAERV4OIXSG3OIRWHA/action/replication_record"}},"created_at":"2026-05-18T01:35:15.264781+00:00","updated_at":"2026-05-18T01:35:15.264781+00:00"}