{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:OOJLQ7REYFA7SBC5NNBCUUT2RN","short_pith_number":"pith:OOJLQ7RE","canonical_record":{"source":{"id":"1904.06941","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.AP","submitted_at":"2019-04-15T10:06:47Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"1679f9249a6e542edd52cef724b262312ff51315daf064ca64546c677634b9d8","abstract_canon_sha256":"b769b4413f0d1a44f6903f9933797a747b26257fe7dc1191ea90d8abc95e0544"},"schema_version":"1.0"},"canonical_sha256":"7392b87e24c141f9045d6b422a527a8b4de26ef89ccdedc0c8dfdb5ee914d00c","source":{"kind":"arxiv","id":"1904.06941","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.06941","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"arxiv_version","alias_value":"1904.06941v1","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.06941","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"pith_short_12","alias_value":"OOJLQ7REYFA7","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"OOJLQ7REYFA7SBC5","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"OOJLQ7RE","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:OOJLQ7REYFA7SBC5NNBCUUT2RN","target":"record","payload":{"canonical_record":{"source":{"id":"1904.06941","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.AP","submitted_at":"2019-04-15T10:06:47Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"1679f9249a6e542edd52cef724b262312ff51315daf064ca64546c677634b9d8","abstract_canon_sha256":"b769b4413f0d1a44f6903f9933797a747b26257fe7dc1191ea90d8abc95e0544"},"schema_version":"1.0"},"canonical_sha256":"7392b87e24c141f9045d6b422a527a8b4de26ef89ccdedc0c8dfdb5ee914d00c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:35.694783Z","signature_b64":"LN70llA5Iw2HbdEeA2x2/7c0bOBT2zxY3U2qWgjvZBDlIGQleX0HG2PGiAu4By4WgMgeAWM6yRibtFvzkS5MAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7392b87e24c141f9045d6b422a527a8b4de26ef89ccdedc0c8dfdb5ee914d00c","last_reissued_at":"2026-05-17T23:48:35.694244Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:35.694244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.06941","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tSle6jAyreBKFcLlNSZdpxiNkKjoXNJZxpExTREcYkWCx/mtjOSCDtNnHfPAZXWrcZ7bK7BW7CuQqh80YCo+Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T07:05:33.589918Z"},"content_sha256":"c6e85dbd5797002ab2f230a3a299ea784975727f0e5e70e082568d731f409a2b","schema_version":"1.0","event_id":"sha256:c6e85dbd5797002ab2f230a3a299ea784975727f0e5e70e082568d731f409a2b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:OOJLQ7REYFA7SBC5NNBCUUT2RN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A framework for streamlined statistical prediction using topic models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"stat.AP","authors_text":"Jonathan Tuke, Lewis Mitchell, Nigel Bean, Vanessa Glenny","submitted_at":"2019-04-15T10:06:47Z","abstract_excerpt":"In the Humanities and Social Sciences, there is increasing interest in approaches to information extraction, prediction, intelligent linkage, and dimension reduction applicable to large text corpora. With approaches in these fields being grounded in traditional statistical techniques, the need arises for frameworks whereby advanced NLP techniques such as topic modelling may be incorporated within classical methodologies. This paper provides a classical, supervised, statistical learning framework for prediction from text, using topic models as a data reduction method and the topics themselves a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.06941","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:48:35Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oki992BeLWOWIrbMBNjUrRj9AtwcZIL9itTfbC77NGuShtXGesqD4A9QocXCPMq0ahppWFnrU+DTCAn3kHP/Cw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T07:05:33.590589Z"},"content_sha256":"6f7857ce6de46f2b883b5e6a0bf017e48314592cb94c506fa9f139cc65ead65a","schema_version":"1.0","event_id":"sha256:6f7857ce6de46f2b883b5e6a0bf017e48314592cb94c506fa9f139cc65ead65a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN/bundle.json","state_url":"https://pith.science/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T07:05:33Z","links":{"resolver":"https://pith.science/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN","bundle":"https://pith.science/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN/bundle.json","state":"https://pith.science/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OOJLQ7REYFA7SBC5NNBCUUT2RN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:OOJLQ7REYFA7SBC5NNBCUUT2RN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b769b4413f0d1a44f6903f9933797a747b26257fe7dc1191ea90d8abc95e0544","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.AP","submitted_at":"2019-04-15T10:06:47Z","title_canon_sha256":"1679f9249a6e542edd52cef724b262312ff51315daf064ca64546c677634b9d8"},"schema_version":"1.0","source":{"id":"1904.06941","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.06941","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"arxiv_version","alias_value":"1904.06941v1","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.06941","created_at":"2026-05-17T23:48:35Z"},{"alias_kind":"pith_short_12","alias_value":"OOJLQ7REYFA7","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"OOJLQ7REYFA7SBC5","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"OOJLQ7RE","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:6f7857ce6de46f2b883b5e6a0bf017e48314592cb94c506fa9f139cc65ead65a","target":"graph","created_at":"2026-05-17T23:48:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In the Humanities and Social Sciences, there is increasing interest in approaches to information extraction, prediction, intelligent linkage, and dimension reduction applicable to large text corpora. With approaches in these fields being grounded in traditional statistical techniques, the need arises for frameworks whereby advanced NLP techniques such as topic modelling may be incorporated within classical methodologies. This paper provides a classical, supervised, statistical learning framework for prediction from text, using topic models as a data reduction method and the topics themselves a","authors_text":"Jonathan Tuke, Lewis Mitchell, Nigel Bean, Vanessa Glenny","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.AP","submitted_at":"2019-04-15T10:06:47Z","title":"A framework for streamlined statistical prediction using topic models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.06941","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c6e85dbd5797002ab2f230a3a299ea784975727f0e5e70e082568d731f409a2b","target":"record","created_at":"2026-05-17T23:48:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b769b4413f0d1a44f6903f9933797a747b26257fe7dc1191ea90d8abc95e0544","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.AP","submitted_at":"2019-04-15T10:06:47Z","title_canon_sha256":"1679f9249a6e542edd52cef724b262312ff51315daf064ca64546c677634b9d8"},"schema_version":"1.0","source":{"id":"1904.06941","kind":"arxiv","version":1}},"canonical_sha256":"7392b87e24c141f9045d6b422a527a8b4de26ef89ccdedc0c8dfdb5ee914d00c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7392b87e24c141f9045d6b422a527a8b4de26ef89ccdedc0c8dfdb5ee914d00c","first_computed_at":"2026-05-17T23:48:35.694244Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:48:35.694244Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LN70llA5Iw2HbdEeA2x2/7c0bOBT2zxY3U2qWgjvZBDlIGQleX0HG2PGiAu4By4WgMgeAWM6yRibtFvzkS5MAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:48:35.694783Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.06941","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c6e85dbd5797002ab2f230a3a299ea784975727f0e5e70e082568d731f409a2b","sha256:6f7857ce6de46f2b883b5e6a0bf017e48314592cb94c506fa9f139cc65ead65a"],"state_sha256":"927f715e805ea1713e35f754f8db34345a7178db5142f40fcf0ece4788c55b4f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Fb3QNvn6/+HbikGsRp0Le/GNNK+TIzPNXJYYRkEYdwgqBbvrJvFGcyRTNiGbf7BGAfEacVxsK5Yoq0MnRkwECA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T07:05:33.594243Z","bundle_sha256":"b6de6736a9c6bac2ff72310836baf080f969468e14d1b55120ebdf935527495f"}}