{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2010:EUKEJHLW32CKLXYC4MTLWEXUMU","short_pith_number":"pith:EUKEJHLW","canonical_record":{"source":{"id":"1005.0104","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-01T20:55:23Z","cross_cats_sorted":[],"title_canon_sha256":"e15dbfe4636eaaa0100c801b2e13d60e23ff92495d0268370f403accc1b61fb2","abstract_canon_sha256":"e8697167374d49b3bbeb60b926f8d660abf2bf5e13f3e5f7607930de15a38987"},"schema_version":"1.0"},"canonical_sha256":"2514449d76de84a5df02e326bb12f4653b9451cdeef52a370d0ae950d2be6530","source":{"kind":"arxiv","id":"1005.0104","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1005.0104","created_at":"2026-05-18T00:40:52Z"},{"alias_kind":"arxiv_version","alias_value":"1005.0104v1","created_at":"2026-05-18T00:40:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1005.0104","created_at":"2026-05-18T00:40:52Z"},{"alias_kind":"pith_short_12","alias_value":"EUKEJHLW32CK","created_at":"2026-05-18T12:26:06Z"},{"alias_kind":"pith_short_16","alias_value":"EUKEJHLW32CKLXYC","created_at":"2026-05-18T12:26:06Z"},{"alias_kind":"pith_short_8","alias_value":"EUKEJHLW","created_at":"2026-05-18T12:26:06Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2010:EUKEJHLW32CKLXYC4MTLWEXUMU","target":"record","payload":{"canonical_record":{"source":{"id":"1005.0104","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-01T20:55:23Z","cross_cats_sorted":[],"title_canon_sha256":"e15dbfe4636eaaa0100c801b2e13d60e23ff92495d0268370f403accc1b61fb2","abstract_canon_sha256":"e8697167374d49b3bbeb60b926f8d660abf2bf5e13f3e5f7607930de15a38987"},"schema_version":"1.0"},"canonical_sha256":"2514449d76de84a5df02e326bb12f4653b9451cdeef52a370d0ae950d2be6530","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:40:52.210100Z","signature_b64":"gpEkUjsifzvqBVFGZMRPQOM2VI2bbWyD7SJpp6jZxYxguiFEwHh5ghZxgLmFd5kFHgrygZh6cRofIPz45xB/DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2514449d76de84a5df02e326bb12f4653b9451cdeef52a370d0ae950d2be6530","last_reissued_at":"2026-05-18T00:40:52.209523Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:40:52.209523Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1005.0104","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EIh+jEQPFZrGHgL++peONj9Et5Wk63Y8QHosDIyuqvYwOCLlmEzi9z+geas9OcQYVQ4vIfs0LIyEuAtFIUGYBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:53:37.343114Z"},"content_sha256":"9443734c45c40a19688972db7dffd6b6aadc35ab1dd8879b74eff2cd82a700fa","schema_version":"1.0","event_id":"sha256:9443734c45c40a19688972db7dffd6b6aadc35ab1dd8879b74eff2cd82a700fa"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2010:EUKEJHLW32CKLXYC4MTLWEXUMU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Joint Structured Models for Extraction from Overlapping Sources","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Rahul Gupta, Sunita Sarawagi","submitted_at":"2010-05-01T20:55:23Z","abstract_excerpt":"We consider the problem of jointly training structured models for extraction from sources whose instances enjoy partial overlap.  This has important applications like user-driven ad-hoc information extraction on the web. Such applications present new challenges in terms of the number of sources and their arbitrary pattern of overlap not seen by earlier collective training schemes applied on two sources. We present an agreement-based learning framework and alternatives within it to trade-off tractability, robustness to noise, and extent of agreement.  We provide a principled scheme to discover "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1005.0104","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JItWbtQrj3Hig+TomN1rmrgdmg12HqVRRMjz9uwuOOHqmqrjYEFCC23Ev9xzgliaHWM4XuxhWojWEe0Ntxc4Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:53:37.343794Z"},"content_sha256":"1538b919a3837e0886b1c9d218b7dd874c56df8192f23f883ad000dac4b3265d","schema_version":"1.0","event_id":"sha256:1538b919a3837e0886b1c9d218b7dd874c56df8192f23f883ad000dac4b3265d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EUKEJHLW32CKLXYC4MTLWEXUMU/bundle.json","state_url":"https://pith.science/pith/EUKEJHLW32CKLXYC4MTLWEXUMU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EUKEJHLW32CKLXYC4MTLWEXUMU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T19:53:37Z","links":{"resolver":"https://pith.science/pith/EUKEJHLW32CKLXYC4MTLWEXUMU","bundle":"https://pith.science/pith/EUKEJHLW32CKLXYC4MTLWEXUMU/bundle.json","state":"https://pith.science/pith/EUKEJHLW32CKLXYC4MTLWEXUMU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EUKEJHLW32CKLXYC4MTLWEXUMU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2010:EUKEJHLW32CKLXYC4MTLWEXUMU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e8697167374d49b3bbeb60b926f8d660abf2bf5e13f3e5f7607930de15a38987","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-01T20:55:23Z","title_canon_sha256":"e15dbfe4636eaaa0100c801b2e13d60e23ff92495d0268370f403accc1b61fb2"},"schema_version":"1.0","source":{"id":"1005.0104","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1005.0104","created_at":"2026-05-18T00:40:52Z"},{"alias_kind":"arxiv_version","alias_value":"1005.0104v1","created_at":"2026-05-18T00:40:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1005.0104","created_at":"2026-05-18T00:40:52Z"},{"alias_kind":"pith_short_12","alias_value":"EUKEJHLW32CK","created_at":"2026-05-18T12:26:06Z"},{"alias_kind":"pith_short_16","alias_value":"EUKEJHLW32CKLXYC","created_at":"2026-05-18T12:26:06Z"},{"alias_kind":"pith_short_8","alias_value":"EUKEJHLW","created_at":"2026-05-18T12:26:06Z"}],"graph_snapshots":[{"event_id":"sha256:1538b919a3837e0886b1c9d218b7dd874c56df8192f23f883ad000dac4b3265d","target":"graph","created_at":"2026-05-18T00:40:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We consider the problem of jointly training structured models for extraction from sources whose instances enjoy partial overlap.  This has important applications like user-driven ad-hoc information extraction on the web. Such applications present new challenges in terms of the number of sources and their arbitrary pattern of overlap not seen by earlier collective training schemes applied on two sources. We present an agreement-based learning framework and alternatives within it to trade-off tractability, robustness to noise, and extent of agreement.  We provide a principled scheme to discover ","authors_text":"Rahul Gupta, Sunita Sarawagi","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-01T20:55:23Z","title":"Joint Structured Models for Extraction from Overlapping Sources"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1005.0104","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9443734c45c40a19688972db7dffd6b6aadc35ab1dd8879b74eff2cd82a700fa","target":"record","created_at":"2026-05-18T00:40:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e8697167374d49b3bbeb60b926f8d660abf2bf5e13f3e5f7607930de15a38987","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-05-01T20:55:23Z","title_canon_sha256":"e15dbfe4636eaaa0100c801b2e13d60e23ff92495d0268370f403accc1b61fb2"},"schema_version":"1.0","source":{"id":"1005.0104","kind":"arxiv","version":1}},"canonical_sha256":"2514449d76de84a5df02e326bb12f4653b9451cdeef52a370d0ae950d2be6530","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2514449d76de84a5df02e326bb12f4653b9451cdeef52a370d0ae950d2be6530","first_computed_at":"2026-05-18T00:40:52.209523Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:40:52.209523Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gpEkUjsifzvqBVFGZMRPQOM2VI2bbWyD7SJpp6jZxYxguiFEwHh5ghZxgLmFd5kFHgrygZh6cRofIPz45xB/DQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:40:52.210100Z","signed_message":"canonical_sha256_bytes"},"source_id":"1005.0104","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9443734c45c40a19688972db7dffd6b6aadc35ab1dd8879b74eff2cd82a700fa","sha256:1538b919a3837e0886b1c9d218b7dd874c56df8192f23f883ad000dac4b3265d"],"state_sha256":"49f7fbfd1e19c4454fb204600b8c5cfd8c50c0b7801ebaa01ee6e8cba010f557"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3TjnIyZamuvWHyWiW8rew2ZpIWUZcFMLhFugzM1ayyoDIfqOiGt5JgKmchESVW3jzafgkEidkP9IQKowjuklCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T19:53:37.347515Z","bundle_sha256":"1e92de7565c9e3b8ce15d4e5f2c72d6b074328fc0fd57e30c8420c15751a9c9f"}}