{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:4O2AC6JQUAWACGAG2NXFFFWDQX","short_pith_number":"pith:4O2AC6JQ","schema_version":"1.0","canonical_sha256":"e3b4017930a02c011806d36e5296c385efd9f68a624f13109b6443d59b7067f5","source":{"kind":"arxiv","id":"1712.03190","version":1},"attestation_state":"computed","paper":{"title":"A Method for Finding Similar Documents Relying on Adding Repetition of Symbols in Length Based Filtering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Hossein Azgomi, Masoud Mohammadi, Masumeh Ghasemi Mahsayeh, Milad Moradi","submitted_at":"2017-12-08T17:32:45Z","abstract_excerpt":"A basic topic in mining of massive dataset is finding similar items. As an example, finding similar documents can be recommended. In this case many methods are existed. For example, Shingling method and length based filtering are one of them. In Shingling method, from each document, substrings have been selected with symbol name and, they are placed on one set. For finding similar documents, the similarities of sets that related with them have been calculated. In Length based filtering just documents which close these lengths have been compared. These methods don't consider repetition of symbo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1712.03190","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2017-12-08T17:32:45Z","cross_cats_sorted":[],"title_canon_sha256":"afe2f437aa0a632e0f304474c53b8b55249d34983c45e1b08e22dd5212e32f5f","abstract_canon_sha256":"6aa61b5aa780de53c3c2315177fb07cbebcb7fa71f07a5edbc7352d75adf6acb"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:28:01.502992Z","signature_b64":"g9sf+F3a6KoSOKp4VavIia83NelOQpXsje7gTaszR8pJr1VtqNTjxz9UCjDtZSJpWxC3KkECl6/OYqLFG1tZCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e3b4017930a02c011806d36e5296c385efd9f68a624f13109b6443d59b7067f5","last_reissued_at":"2026-05-18T00:28:01.502190Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:28:01.502190Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Method for Finding Similar Documents Relying on Adding Repetition of Symbols in Length Based Filtering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Hossein Azgomi, Masoud Mohammadi, Masumeh Ghasemi Mahsayeh, Milad Moradi","submitted_at":"2017-12-08T17:32:45Z","abstract_excerpt":"A basic topic in mining of massive dataset is finding similar items. As an example, finding similar documents can be recommended. In this case many methods are existed. For example, Shingling method and length based filtering are one of them. In Shingling method, from each document, substrings have been selected with symbol name and, they are placed on one set. For finding similar documents, the similarities of sets that related with them have been calculated. In Length based filtering just documents which close these lengths have been compared. These methods don't consider repetition of symbo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.03190","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1712.03190","created_at":"2026-05-18T00:28:01.502324+00:00"},{"alias_kind":"arxiv_version","alias_value":"1712.03190v1","created_at":"2026-05-18T00:28:01.502324+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.03190","created_at":"2026-05-18T00:28:01.502324+00:00"},{"alias_kind":"pith_short_12","alias_value":"4O2AC6JQUAWA","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_16","alias_value":"4O2AC6JQUAWACGAG","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_8","alias_value":"4O2AC6JQ","created_at":"2026-05-18T12:31:00.734936+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX","json":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX.json","graph_json":"https://pith.science/api/pith-number/4O2AC6JQUAWACGAG2NXFFFWDQX/graph.json","events_json":"https://pith.science/api/pith-number/4O2AC6JQUAWACGAG2NXFFFWDQX/events.json","paper":"https://pith.science/paper/4O2AC6JQ"},"agent_actions":{"view_html":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX","download_json":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX.json","view_paper":"https://pith.science/paper/4O2AC6JQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1712.03190&json=true","fetch_graph":"https://pith.science/api/pith-number/4O2AC6JQUAWACGAG2NXFFFWDQX/graph.json","fetch_events":"https://pith.science/api/pith-number/4O2AC6JQUAWACGAG2NXFFFWDQX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX/action/storage_attestation","attest_author":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX/action/author_attestation","sign_citation":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX/action/citation_signature","submit_replication":"https://pith.science/pith/4O2AC6JQUAWACGAG2NXFFFWDQX/action/replication_record"}},"created_at":"2026-05-18T00:28:01.502324+00:00","updated_at":"2026-05-18T00:28:01.502324+00:00"}