{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:GEF76O7U2VHN6PVQCKETYA4X42","short_pith_number":"pith:GEF76O7U","canonical_record":{"source":{"id":"1703.08244","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T22:20:45Z","cross_cats_sorted":[],"title_canon_sha256":"151bdc1d7cb5df23b9aa66d034978a683c499b0dae111f108123ce8f4f7e56d2","abstract_canon_sha256":"6b09a41047c3197c3bceeeeef8cbddd0719d56acb85896705b9b5e8ad9800a86"},"schema_version":"1.0"},"canonical_sha256":"310bff3bf4d54edf3eb012893c0397e696760c7e3e9a483a74e517fc06d95870","source":{"kind":"arxiv","id":"1703.08244","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.08244","created_at":"2026-05-18T00:48:00Z"},{"alias_kind":"arxiv_version","alias_value":"1703.08244v1","created_at":"2026-05-18T00:48:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08244","created_at":"2026-05-18T00:48:00Z"},{"alias_kind":"pith_short_12","alias_value":"GEF76O7U2VHN","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"GEF76O7U2VHN6PVQ","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"GEF76O7U","created_at":"2026-05-18T12:31:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:GEF76O7U2VHN6PVQCKETYA4X42","target":"record","payload":{"canonical_record":{"source":{"id":"1703.08244","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T22:20:45Z","cross_cats_sorted":[],"title_canon_sha256":"151bdc1d7cb5df23b9aa66d034978a683c499b0dae111f108123ce8f4f7e56d2","abstract_canon_sha256":"6b09a41047c3197c3bceeeeef8cbddd0719d56acb85896705b9b5e8ad9800a86"},"schema_version":"1.0"},"canonical_sha256":"310bff3bf4d54edf3eb012893c0397e696760c7e3e9a483a74e517fc06d95870","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:48:00.381633Z","signature_b64":"X6yfr5giZbCfxT4Olg3fOMDG7/4jNVE7FwoPPc73YawMwBsTGmj7ATjrSGdaWH1rbQU9pSX6WjCYMv4joGQABQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"310bff3bf4d54edf3eb012893c0397e696760c7e3e9a483a74e517fc06d95870","last_reissued_at":"2026-05-18T00:48:00.381176Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:48:00.381176Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1703.08244","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:48:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1bVXuel8QrP/oLpB6P1ASgY6RTAL62Ej+zqZdIac39m4tXcdvdqC11JEBEb56hoFs8C9EXaZjGiMn/NkFOOBDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T22:44:07.422985Z"},"content_sha256":"a75a49c7d1e1daf8b001bca5d346af3064663d5b0edaaf5ca0eae229fdc8acc6","schema_version":"1.0","event_id":"sha256:a75a49c7d1e1daf8b001bca5d346af3064663d5b0edaaf5ca0eae229fdc8acc6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:GEF76O7U2VHN6PVQCKETYA4X42","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"TokTrack: A Complete Token Provenance and Change Tracking Dataset for the English Wikipedia","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fabian Fl\\\"ock, Kenan Erdogan, Maribel Acosta","submitted_at":"2017-03-23T22:20:45Z","abstract_excerpt":"We present a dataset that contains every instance of all tokens (~ words) ever written in undeleted, non-redirect English Wikipedia articles until October 2016, in total 13,545,349,787 instances. Each token is annotated with (i) the article revision it was originally created in, and (ii) lists with all the revisions in which the token was ever deleted and (potentially) re-added and re-deleted from its article, enabling a complete and straightforward tracking of its history. This data would be exceedingly hard to create by an average potential user as it is (i) very expensive to compute and as "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08244","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:48:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aPFONvNLyEVwuDgsaG0KRjpaJU8x5WKPx8STODGFhXeko8JdCArcfFoX1E87ksO3aVjsquWQtM+H07m9BR8tDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T22:44:07.423453Z"},"content_sha256":"ede25819f7acdb4e8b83bb818ce252ce03fab9bed72c7ece773f5534938dc57f","schema_version":"1.0","event_id":"sha256:ede25819f7acdb4e8b83bb818ce252ce03fab9bed72c7ece773f5534938dc57f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GEF76O7U2VHN6PVQCKETYA4X42/bundle.json","state_url":"https://pith.science/pith/GEF76O7U2VHN6PVQCKETYA4X42/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GEF76O7U2VHN6PVQCKETYA4X42/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T22:44:07Z","links":{"resolver":"https://pith.science/pith/GEF76O7U2VHN6PVQCKETYA4X42","bundle":"https://pith.science/pith/GEF76O7U2VHN6PVQCKETYA4X42/bundle.json","state":"https://pith.science/pith/GEF76O7U2VHN6PVQCKETYA4X42/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GEF76O7U2VHN6PVQCKETYA4X42/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:GEF76O7U2VHN6PVQCKETYA4X42","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6b09a41047c3197c3bceeeeef8cbddd0719d56acb85896705b9b5e8ad9800a86","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T22:20:45Z","title_canon_sha256":"151bdc1d7cb5df23b9aa66d034978a683c499b0dae111f108123ce8f4f7e56d2"},"schema_version":"1.0","source":{"id":"1703.08244","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.08244","created_at":"2026-05-18T00:48:00Z"},{"alias_kind":"arxiv_version","alias_value":"1703.08244v1","created_at":"2026-05-18T00:48:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08244","created_at":"2026-05-18T00:48:00Z"},{"alias_kind":"pith_short_12","alias_value":"GEF76O7U2VHN","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_16","alias_value":"GEF76O7U2VHN6PVQ","created_at":"2026-05-18T12:31:15Z"},{"alias_kind":"pith_short_8","alias_value":"GEF76O7U","created_at":"2026-05-18T12:31:15Z"}],"graph_snapshots":[{"event_id":"sha256:ede25819f7acdb4e8b83bb818ce252ce03fab9bed72c7ece773f5534938dc57f","target":"graph","created_at":"2026-05-18T00:48:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present a dataset that contains every instance of all tokens (~ words) ever written in undeleted, non-redirect English Wikipedia articles until October 2016, in total 13,545,349,787 instances. Each token is annotated with (i) the article revision it was originally created in, and (ii) lists with all the revisions in which the token was ever deleted and (potentially) re-added and re-deleted from its article, enabling a complete and straightforward tracking of its history. This data would be exceedingly hard to create by an average potential user as it is (i) very expensive to compute and as ","authors_text":"Fabian Fl\\\"ock, Kenan Erdogan, Maribel Acosta","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T22:20:45Z","title":"TokTrack: A Complete Token Provenance and Change Tracking Dataset for the English Wikipedia"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08244","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a75a49c7d1e1daf8b001bca5d346af3064663d5b0edaaf5ca0eae229fdc8acc6","target":"record","created_at":"2026-05-18T00:48:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6b09a41047c3197c3bceeeeef8cbddd0719d56acb85896705b9b5e8ad9800a86","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T22:20:45Z","title_canon_sha256":"151bdc1d7cb5df23b9aa66d034978a683c499b0dae111f108123ce8f4f7e56d2"},"schema_version":"1.0","source":{"id":"1703.08244","kind":"arxiv","version":1}},"canonical_sha256":"310bff3bf4d54edf3eb012893c0397e696760c7e3e9a483a74e517fc06d95870","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"310bff3bf4d54edf3eb012893c0397e696760c7e3e9a483a74e517fc06d95870","first_computed_at":"2026-05-18T00:48:00.381176Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:48:00.381176Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"X6yfr5giZbCfxT4Olg3fOMDG7/4jNVE7FwoPPc73YawMwBsTGmj7ATjrSGdaWH1rbQU9pSX6WjCYMv4joGQABQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:48:00.381633Z","signed_message":"canonical_sha256_bytes"},"source_id":"1703.08244","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a75a49c7d1e1daf8b001bca5d346af3064663d5b0edaaf5ca0eae229fdc8acc6","sha256:ede25819f7acdb4e8b83bb818ce252ce03fab9bed72c7ece773f5534938dc57f"],"state_sha256":"7feded5d6a81c7ba3a7d1f80a2e546239afdb376e411ba16217485d2d5e43547"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LNXocbur7uXyrG4pEM/M6g61YFUWl6oYPGZx40W6fJ8mq2S9fKVfsOCzj/ykii65ZmceDMcyn4HDifU7X9VeBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T22:44:07.425655Z","bundle_sha256":"c6ca806e84e77ef622afc341ad2d84c530d82ce3878aa050a7d5deb56b897c32"}}