{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:7YFZF6XZ3UP5OTCA6ZU6PCDZX3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cfdc4920b80fa3e8e753a3d0ec427cd4db24256fc0b55c671a19f9d10e07687f","cross_cats_sorted":["cs.CL","cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DL","submitted_at":"2015-12-15T23:07:58Z","title_canon_sha256":"a006b058ba94a2d63936e1f7995c7eb55e11e79fe843347afe9466c68dba0a70"},"schema_version":"1.0","source":{"id":"1512.05004","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.05004","created_at":"2026-05-18T00:50:57Z"},{"alias_kind":"arxiv_version","alias_value":"1512.05004v3","created_at":"2026-05-18T00:50:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.05004","created_at":"2026-05-18T00:50:57Z"},{"alias_kind":"pith_short_12","alias_value":"7YFZF6XZ3UP5","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_16","alias_value":"7YFZF6XZ3UP5OTCA","created_at":"2026-05-18T12:29:10Z"},{"alias_kind":"pith_short_8","alias_value":"7YFZF6XZ","created_at":"2026-05-18T12:29:10Z"}],"graph_snapshots":[{"event_id":"sha256:532c9d4f8752c1907f8c58cf691eca97d3089112380be8d8676a5239e3d97cfc","target":"graph","created_at":"2026-05-18T00:50:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Cultural-scale models of full text documents are prone to over-interpretation by researchers making unintentionally strong socio-linguistic claims (Pechenick et al., 2015) without recognizing that even large digital libraries are merely samples of all the books ever produced. In this study, we test the sensitivity of the topic models to the sampling process by taking random samples of books in the Hathi Trust Digital Library from different areas of the Library of Congress Classification Outline. For each classification area, we train several topic models over the entire class with different ra","authors_text":"Colin Allen, Jaimie Murdock, Jiaan Zeng","cross_cats":["cs.CL","cs.IR"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DL","submitted_at":"2015-12-15T23:07:58Z","title":"Towards Evaluation of Cultural-scale Claims in Light of Topic Model Sampling Effects"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.05004","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:149338ec17e8a51949d321bc747857e8f6de0bb138236a444915d2515badeae3","target":"record","created_at":"2026-05-18T00:50:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cfdc4920b80fa3e8e753a3d0ec427cd4db24256fc0b55c671a19f9d10e07687f","cross_cats_sorted":["cs.CL","cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DL","submitted_at":"2015-12-15T23:07:58Z","title_canon_sha256":"a006b058ba94a2d63936e1f7995c7eb55e11e79fe843347afe9466c68dba0a70"},"schema_version":"1.0","source":{"id":"1512.05004","kind":"arxiv","version":3}},"canonical_sha256":"fe0b92faf9dd1fd74c40f669e78879bece6d53aa8506aa5e92b10c064b6e9452","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fe0b92faf9dd1fd74c40f669e78879bece6d53aa8506aa5e92b10c064b6e9452","first_computed_at":"2026-05-18T00:50:57.538870Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:50:57.538870Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LfW/qewYE8TaJ+I2q9KxoRST/H7FHJxA9KCYkJNd7XVWyz/g27NlsI0fks0j1v2W1lXK7v9qKmj5VOfBNWqFDA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:50:57.539492Z","signed_message":"canonical_sha256_bytes"},"source_id":"1512.05004","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:149338ec17e8a51949d321bc747857e8f6de0bb138236a444915d2515badeae3","sha256:532c9d4f8752c1907f8c58cf691eca97d3089112380be8d8676a5239e3d97cfc"],"state_sha256":"cc5cb6658ab8b351f661d5b787ee3afa427a1ee9a1abcf54f50017c93793b968"}