{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:YIW7HDCX6EBGXBOEZMHMQ5YYNF","short_pith_number":"pith:YIW7HDCX","schema_version":"1.0","canonical_sha256":"c22df38c57f1026b85c4cb0ec87718696bb2a8a3c495566765f4b7811921a059","source":{"kind":"arxiv","id":"1905.03836","version":1},"attestation_state":"computed","paper":{"title":"Collecting 16K archived web pages from 17 public web archives","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DL","authors_text":"Herbert Van de Sompel, Martin Klein, Michael L. Nelson, Michele C. Weigle, Mohamed Aturban","submitted_at":"2019-05-09T20:00:23Z","abstract_excerpt":"We document the creation of a data set of 16,627 archived web pages, or mementos, of 3,698 unique live web URIs (Uniform Resource Identifiers) from 17 public web archives. We used four different methods to collect the dataset. First, we used the Los Alamos National Laboratory (LANL) Memento Aggregator to collect mementos of an initial set of URIs obtained from four sources: (a) the Moz Top 500, (b) the dataset used in our previous study, (c) the HTTP Archive, and (d) the Web Archives for Historical Research group. Second, we extracted URIs from the HTML of already collected mementos. These URI"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1905.03836","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2019-05-09T20:00:23Z","cross_cats_sorted":[],"title_canon_sha256":"caabc0cdfa813acf510b147348a86f902e6a7186a07da1a633bccf84ed767917","abstract_canon_sha256":"0043abdecd626d65526d56eea407ded0860682bff151a63c66161fe11674b65f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:46:35.485072Z","signature_b64":"2Kqqw9OCvEis7M+Ge5orkJ7zVmxxVy0eGYX8KubkX5xAEveP8jabVy0r237iNbrhURYZrv7Nl9gMpwQJZBZABA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c22df38c57f1026b85c4cb0ec87718696bb2a8a3c495566765f4b7811921a059","last_reissued_at":"2026-05-17T23:46:35.484475Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:46:35.484475Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Collecting 16K archived web pages from 17 public web archives","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DL","authors_text":"Herbert Van de Sompel, Martin Klein, Michael L. Nelson, Michele C. Weigle, Mohamed Aturban","submitted_at":"2019-05-09T20:00:23Z","abstract_excerpt":"We document the creation of a data set of 16,627 archived web pages, or mementos, of 3,698 unique live web URIs (Uniform Resource Identifiers) from 17 public web archives. We used four different methods to collect the dataset. First, we used the Los Alamos National Laboratory (LANL) Memento Aggregator to collect mementos of an initial set of URIs obtained from four sources: (a) the Moz Top 500, (b) the dataset used in our previous study, (c) the HTTP Archive, and (d) the Web Archives for Historical Research group. Second, we extracted URIs from the HTML of already collected mementos. These URI"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.03836","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1905.03836","created_at":"2026-05-17T23:46:35.484571+00:00"},{"alias_kind":"arxiv_version","alias_value":"1905.03836v1","created_at":"2026-05-17T23:46:35.484571+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.03836","created_at":"2026-05-17T23:46:35.484571+00:00"},{"alias_kind":"pith_short_12","alias_value":"YIW7HDCX6EBG","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"YIW7HDCX6EBGXBOE","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"YIW7HDCX","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF","json":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF.json","graph_json":"https://pith.science/api/pith-number/YIW7HDCX6EBGXBOEZMHMQ5YYNF/graph.json","events_json":"https://pith.science/api/pith-number/YIW7HDCX6EBGXBOEZMHMQ5YYNF/events.json","paper":"https://pith.science/paper/YIW7HDCX"},"agent_actions":{"view_html":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF","download_json":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF.json","view_paper":"https://pith.science/paper/YIW7HDCX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1905.03836&json=true","fetch_graph":"https://pith.science/api/pith-number/YIW7HDCX6EBGXBOEZMHMQ5YYNF/graph.json","fetch_events":"https://pith.science/api/pith-number/YIW7HDCX6EBGXBOEZMHMQ5YYNF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF/action/storage_attestation","attest_author":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF/action/author_attestation","sign_citation":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF/action/citation_signature","submit_replication":"https://pith.science/pith/YIW7HDCX6EBGXBOEZMHMQ5YYNF/action/replication_record"}},"created_at":"2026-05-17T23:46:35.484571+00:00","updated_at":"2026-05-17T23:46:35.484571+00:00"}