{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2012:7K3MCBBFCYYB6WZ4SDAKMHMYUD","short_pith_number":"pith:7K3MCBBF","schema_version":"1.0","canonical_sha256":"fab6c1042516301f5b3c90c0a61d98a0c16a66fc91c23c8cace42787284b29b5","source":{"kind":"arxiv","id":"1208.3530","version":1},"attestation_state":"computed","paper":{"title":"Leveraging Subjective Human Annotation for Clustering Historic Newspaper Articles","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.DL"],"primary_cat":"cs.IR","authors_text":"Austin Lee, Axinia Radeva, Barbara Taranto, Boyi Xie, Deepak Shankargouda, Haimonti Dutta, Kyle Rego, Manoj Pooleery, Rebecca Passonneau, William Chan","submitted_at":"2012-08-17T04:48:58Z","abstract_excerpt":"The New York Public Library is participating in the Chronicling America initiative to develop an online searchable database of historically significant newspaper articles. Microfilm copies of the newspapers are scanned and high resolution Optical Character Recognition (OCR) software is run on them. The text from the OCR provides a wealth of data and opinion for researchers and historians. However, categorization of articles provided by the OCR engine is rudimentary and a large number of the articles are labeled editorial without further grouping. Manually sorting articles into fine-grained cat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1208.3530","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2012-08-17T04:48:58Z","cross_cats_sorted":["cs.CL","cs.DL"],"title_canon_sha256":"6dffb40af2ffb54e6145722ec02a6e4c85e9ec8a096db534b393ec2135e3c489","abstract_canon_sha256":"9936eb70f36e5224aefd6eb44e558987e7422f1925a7dbec4b594e872f94f6d9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:48:35.012680Z","signature_b64":"fILm0PmWMQBCCOlI84KPibnjrMprckoJwO0/U35uOozWGwNOVk8AOp6ZIBh+23sgEaRB1AVpejaceGXIqWHzBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fab6c1042516301f5b3c90c0a61d98a0c16a66fc91c23c8cace42787284b29b5","last_reissued_at":"2026-05-18T03:48:35.012142Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:48:35.012142Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Leveraging Subjective Human Annotation for Clustering Historic Newspaper Articles","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.DL"],"primary_cat":"cs.IR","authors_text":"Austin Lee, Axinia Radeva, Barbara Taranto, Boyi Xie, Deepak Shankargouda, Haimonti Dutta, Kyle Rego, Manoj Pooleery, Rebecca Passonneau, William Chan","submitted_at":"2012-08-17T04:48:58Z","abstract_excerpt":"The New York Public Library is participating in the Chronicling America initiative to develop an online searchable database of historically significant newspaper articles. Microfilm copies of the newspapers are scanned and high resolution Optical Character Recognition (OCR) software is run on them. The text from the OCR provides a wealth of data and opinion for researchers and historians. However, categorization of articles provided by the OCR engine is rudimentary and a large number of the articles are labeled editorial without further grouping. Manually sorting articles into fine-grained cat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1208.3530","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1208.3530","created_at":"2026-05-18T03:48:35.012209+00:00"},{"alias_kind":"arxiv_version","alias_value":"1208.3530v1","created_at":"2026-05-18T03:48:35.012209+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1208.3530","created_at":"2026-05-18T03:48:35.012209+00:00"},{"alias_kind":"pith_short_12","alias_value":"7K3MCBBFCYYB","created_at":"2026-05-18T12:26:58.693483+00:00"},{"alias_kind":"pith_short_16","alias_value":"7K3MCBBFCYYB6WZ4","created_at":"2026-05-18T12:26:58.693483+00:00"},{"alias_kind":"pith_short_8","alias_value":"7K3MCBBF","created_at":"2026-05-18T12:26:58.693483+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD","json":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD.json","graph_json":"https://pith.science/api/pith-number/7K3MCBBFCYYB6WZ4SDAKMHMYUD/graph.json","events_json":"https://pith.science/api/pith-number/7K3MCBBFCYYB6WZ4SDAKMHMYUD/events.json","paper":"https://pith.science/paper/7K3MCBBF"},"agent_actions":{"view_html":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD","download_json":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD.json","view_paper":"https://pith.science/paper/7K3MCBBF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1208.3530&json=true","fetch_graph":"https://pith.science/api/pith-number/7K3MCBBFCYYB6WZ4SDAKMHMYUD/graph.json","fetch_events":"https://pith.science/api/pith-number/7K3MCBBFCYYB6WZ4SDAKMHMYUD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD/action/storage_attestation","attest_author":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD/action/author_attestation","sign_citation":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD/action/citation_signature","submit_replication":"https://pith.science/pith/7K3MCBBFCYYB6WZ4SDAKMHMYUD/action/replication_record"}},"created_at":"2026-05-18T03:48:35.012209+00:00","updated_at":"2026-05-18T03:48:35.012209+00:00"}