{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:DYDPCOSPPK7X4DQI5EU7F4AIH6","short_pith_number":"pith:DYDPCOSP","canonical_record":{"source":{"id":"1301.0556","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-12-12T15:55:25Z","cross_cats_sorted":["cs.IR","stat.ML"],"title_canon_sha256":"b56f612a937d02f814cad023e3fe7bb2c3be69a3bacd8351dc9de42224f0214c","abstract_canon_sha256":"7ffc47d618f4964436109ed20f72281a03c61a493bd3b6c496ea9ea032b61229"},"schema_version":"1.0"},"canonical_sha256":"1e06f13a4f7abf7e0e08e929f2f0083fb6561bae30b16cbac3f7fdbae26296ad","source":{"kind":"arxiv","id":"1301.0556","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1301.0556","created_at":"2026-05-18T03:37:18Z"},{"alias_kind":"arxiv_version","alias_value":"1301.0556v1","created_at":"2026-05-18T03:37:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1301.0556","created_at":"2026-05-18T03:37:18Z"},{"alias_kind":"pith_short_12","alias_value":"DYDPCOSPPK7X","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_16","alias_value":"DYDPCOSPPK7X4DQI","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_8","alias_value":"DYDPCOSP","created_at":"2026-05-18T12:27:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:DYDPCOSPPK7X4DQI5EU7F4AIH6","target":"record","payload":{"canonical_record":{"source":{"id":"1301.0556","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-12-12T15:55:25Z","cross_cats_sorted":["cs.IR","stat.ML"],"title_canon_sha256":"b56f612a937d02f814cad023e3fe7bb2c3be69a3bacd8351dc9de42224f0214c","abstract_canon_sha256":"7ffc47d618f4964436109ed20f72281a03c61a493bd3b6c496ea9ea032b61229"},"schema_version":"1.0"},"canonical_sha256":"1e06f13a4f7abf7e0e08e929f2f0083fb6561bae30b16cbac3f7fdbae26296ad","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:37:18.595092Z","signature_b64":"4n/9tFOV9CnSKPVzoxjN1y7oZYVig6xXvZRVxS8LN9p28WY8415DLoaodccVD0tpceEQj5QVwSs50/htzsZmBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1e06f13a4f7abf7e0e08e929f2f0083fb6561bae30b16cbac3f7fdbae26296ad","last_reissued_at":"2026-05-18T03:37:18.594294Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:37:18.594294Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1301.0556","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:37:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ap/BKpS0c62jOFTuiPWkRrqeIfyxB0YH9wbPFc4D79er0JpnIZMp0h45rmCpQwoDqFkE6yoq+O1SoUY95KfnDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T10:11:21.562958Z"},"content_sha256":"b4078a5b41246f5b2096d78be1413e4cfeaf8d447ff4281fb811924e3c86c39d","schema_version":"1.0","event_id":"sha256:b4078a5b41246f5b2096d78be1413e4cfeaf8d447ff4281fb811924e3c86c39d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:DYDPCOSPPK7X4DQI5EU7F4AIH6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning with Scope, with Application to Information Extraction and Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","stat.ML"],"primary_cat":"cs.LG","authors_text":"Andrew McCallum, David Blei, J Andrew Bagnell","submitted_at":"2012-12-12T15:55:25Z","abstract_excerpt":"In probabilistic approaches to classification and information extraction, one typically builds a statistical model of words under the assumption that future data will exhibit the same regularities as the training data.  In many data sets, however, there are scope-limited features whose predictive power is only applicable to a certain subset of the data.  For example, in information extraction from web pages, word formatting may be indicative of extraction category in different ways on different web pages.  The difficulty with using such features is capturing and exploiting the new regularities"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1301.0556","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:37:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KEePPPlSgPir/1z7sMluXjUH862Tb9n4PKS+/aPKMY1UWHq5+yLmdInt+DWpzszHLCedj/h0fGPBUsvcQZKDAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T10:11:21.563585Z"},"content_sha256":"de404c7b96d9eb4c0a3250fb40dcc8a88951a59408b3ed8f7166ca6a5abe6a9a","schema_version":"1.0","event_id":"sha256:de404c7b96d9eb4c0a3250fb40dcc8a88951a59408b3ed8f7166ca6a5abe6a9a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6/bundle.json","state_url":"https://pith.science/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T10:11:21Z","links":{"resolver":"https://pith.science/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6","bundle":"https://pith.science/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6/bundle.json","state":"https://pith.science/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DYDPCOSPPK7X4DQI5EU7F4AIH6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:DYDPCOSPPK7X4DQI5EU7F4AIH6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7ffc47d618f4964436109ed20f72281a03c61a493bd3b6c496ea9ea032b61229","cross_cats_sorted":["cs.IR","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-12-12T15:55:25Z","title_canon_sha256":"b56f612a937d02f814cad023e3fe7bb2c3be69a3bacd8351dc9de42224f0214c"},"schema_version":"1.0","source":{"id":"1301.0556","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1301.0556","created_at":"2026-05-18T03:37:18Z"},{"alias_kind":"arxiv_version","alias_value":"1301.0556v1","created_at":"2026-05-18T03:37:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1301.0556","created_at":"2026-05-18T03:37:18Z"},{"alias_kind":"pith_short_12","alias_value":"DYDPCOSPPK7X","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_16","alias_value":"DYDPCOSPPK7X4DQI","created_at":"2026-05-18T12:27:04Z"},{"alias_kind":"pith_short_8","alias_value":"DYDPCOSP","created_at":"2026-05-18T12:27:04Z"}],"graph_snapshots":[{"event_id":"sha256:de404c7b96d9eb4c0a3250fb40dcc8a88951a59408b3ed8f7166ca6a5abe6a9a","target":"graph","created_at":"2026-05-18T03:37:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In probabilistic approaches to classification and information extraction, one typically builds a statistical model of words under the assumption that future data will exhibit the same regularities as the training data.  In many data sets, however, there are scope-limited features whose predictive power is only applicable to a certain subset of the data.  For example, in information extraction from web pages, word formatting may be indicative of extraction category in different ways on different web pages.  The difficulty with using such features is capturing and exploiting the new regularities","authors_text":"Andrew McCallum, David Blei, J Andrew Bagnell","cross_cats":["cs.IR","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-12-12T15:55:25Z","title":"Learning with Scope, with Application to Information Extraction and Classification"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1301.0556","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b4078a5b41246f5b2096d78be1413e4cfeaf8d447ff4281fb811924e3c86c39d","target":"record","created_at":"2026-05-18T03:37:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7ffc47d618f4964436109ed20f72281a03c61a493bd3b6c496ea9ea032b61229","cross_cats_sorted":["cs.IR","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-12-12T15:55:25Z","title_canon_sha256":"b56f612a937d02f814cad023e3fe7bb2c3be69a3bacd8351dc9de42224f0214c"},"schema_version":"1.0","source":{"id":"1301.0556","kind":"arxiv","version":1}},"canonical_sha256":"1e06f13a4f7abf7e0e08e929f2f0083fb6561bae30b16cbac3f7fdbae26296ad","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1e06f13a4f7abf7e0e08e929f2f0083fb6561bae30b16cbac3f7fdbae26296ad","first_computed_at":"2026-05-18T03:37:18.594294Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:37:18.594294Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4n/9tFOV9CnSKPVzoxjN1y7oZYVig6xXvZRVxS8LN9p28WY8415DLoaodccVD0tpceEQj5QVwSs50/htzsZmBw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:37:18.595092Z","signed_message":"canonical_sha256_bytes"},"source_id":"1301.0556","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b4078a5b41246f5b2096d78be1413e4cfeaf8d447ff4281fb811924e3c86c39d","sha256:de404c7b96d9eb4c0a3250fb40dcc8a88951a59408b3ed8f7166ca6a5abe6a9a"],"state_sha256":"af3c55fc3ef79126372aa3dc7b12cb3edfa06befc18238c31c1cf29285856c37"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MGE1fS3ojs1dKxR2nT6VkoQlDAlrJzqbwKHPU7IApxK8hrCSgeuHsLKt0SjWVTqkJXMpNpMR9c4jwQmYTA1QCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T10:11:21.566263Z","bundle_sha256":"1afed146ad0e02c3a50a67c1caa960d0527d5ed429f9b4ed21f54071b07430c3"}}