{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:URGMK7VHZLL6KCB244IWCMINEU","short_pith_number":"pith:URGMK7VH","canonical_record":{"source":{"id":"1804.02525","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SI","submitted_at":"2018-04-07T07:50:50Z","cross_cats_sorted":["cs.CL","cs.IR"],"title_canon_sha256":"cd56d42a19a802510ada4a579b81bd0797c0238d9e4b3b00d3abd30fad9f56f0","abstract_canon_sha256":"8e35c68004cc4d207fc10d6c82786f8c0f8c17842c41f5a85b546768cc0dba64"},"schema_version":"1.0"},"canonical_sha256":"a44cc57ea7cad7e5083ae71161310d253be49df221681edc8973dd6736942eea","source":{"kind":"arxiv","id":"1804.02525","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.02525","created_at":"2026-05-18T00:18:59Z"},{"alias_kind":"arxiv_version","alias_value":"1804.02525v1","created_at":"2026-05-18T00:18:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.02525","created_at":"2026-05-18T00:18:59Z"},{"alias_kind":"pith_short_12","alias_value":"URGMK7VHZLL6","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"URGMK7VHZLL6KCB2","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"URGMK7VH","created_at":"2026-05-18T12:32:56Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:URGMK7VHZLL6KCB244IWCMINEU","target":"record","payload":{"canonical_record":{"source":{"id":"1804.02525","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SI","submitted_at":"2018-04-07T07:50:50Z","cross_cats_sorted":["cs.CL","cs.IR"],"title_canon_sha256":"cd56d42a19a802510ada4a579b81bd0797c0238d9e4b3b00d3abd30fad9f56f0","abstract_canon_sha256":"8e35c68004cc4d207fc10d6c82786f8c0f8c17842c41f5a85b546768cc0dba64"},"schema_version":"1.0"},"canonical_sha256":"a44cc57ea7cad7e5083ae71161310d253be49df221681edc8973dd6736942eea","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:18:59.207130Z","signature_b64":"+gCergi7PwaW7Df6vaDYLUMdVuCvT+8YMP4fEAu+I/JwLvxJLwjh/736zX3V5qLhQOfQp9K0HFHa26qFyNImBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a44cc57ea7cad7e5083ae71161310d253be49df221681edc8973dd6736942eea","last_reissued_at":"2026-05-18T00:18:59.206701Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:18:59.206701Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.02525","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dWbyUzjLB2C5hSptouNHCwgj/k7oEiB0M0fClmDwg1oy8e5/41z/Zje6wU/HUmz3m/VOuAhjZo0uaA6jtN12Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T17:47:21.007969Z"},"content_sha256":"77a58e2992273e0f13a21b9c006bcc4be61716b8320f697803ba17b249accdc1","schema_version":"1.0","event_id":"sha256:77a58e2992273e0f13a21b9c006bcc4be61716b8320f697803ba17b249accdc1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:URGMK7VHZLL6KCB244IWCMINEU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Quootstrap: Scalable Unsupervised Extraction of Quotation-Speaker Pairs from Large News Corpora via Bootstrapping","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR"],"primary_cat":"cs.SI","authors_text":"Dario Pavllo, Robert West, Tiziano Piccardi","submitted_at":"2018-04-07T07:50:50Z","abstract_excerpt":"We propose Quootstrap, a method for extracting quotations, as well as the names of the speakers who uttered them, from large news corpora. Whereas prior work has addressed this problem primarily with supervised machine learning, our approach follows a fully unsupervised bootstrapping paradigm. It leverages the redundancy present in large news corpora, more precisely, the fact that the same quotation often appears across multiple news articles in slightly different contexts. Starting from a few seed patterns, such as [\"Q\", said S.], our method extracts a set of quotation-speaker pairs (Q, S), w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.02525","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dpNL5KyH4DqcZYyB7u9FhpOqLnQ0ZD5JqFdtsN2Ox9Y+i/Nd1k981qKngUPLIXg5eAVGoT4dGLgQ/xKCpgRyCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T17:47:21.008729Z"},"content_sha256":"2b0baab8d357d4fc7d3df6d7d5dbac7e15dc5a3cbaee74e9e70ab4514ec0a39f","schema_version":"1.0","event_id":"sha256:2b0baab8d357d4fc7d3df6d7d5dbac7e15dc5a3cbaee74e9e70ab4514ec0a39f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/URGMK7VHZLL6KCB244IWCMINEU/bundle.json","state_url":"https://pith.science/pith/URGMK7VHZLL6KCB244IWCMINEU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/URGMK7VHZLL6KCB244IWCMINEU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T17:47:21Z","links":{"resolver":"https://pith.science/pith/URGMK7VHZLL6KCB244IWCMINEU","bundle":"https://pith.science/pith/URGMK7VHZLL6KCB244IWCMINEU/bundle.json","state":"https://pith.science/pith/URGMK7VHZLL6KCB244IWCMINEU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/URGMK7VHZLL6KCB244IWCMINEU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:URGMK7VHZLL6KCB244IWCMINEU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8e35c68004cc4d207fc10d6c82786f8c0f8c17842c41f5a85b546768cc0dba64","cross_cats_sorted":["cs.CL","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SI","submitted_at":"2018-04-07T07:50:50Z","title_canon_sha256":"cd56d42a19a802510ada4a579b81bd0797c0238d9e4b3b00d3abd30fad9f56f0"},"schema_version":"1.0","source":{"id":"1804.02525","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.02525","created_at":"2026-05-18T00:18:59Z"},{"alias_kind":"arxiv_version","alias_value":"1804.02525v1","created_at":"2026-05-18T00:18:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.02525","created_at":"2026-05-18T00:18:59Z"},{"alias_kind":"pith_short_12","alias_value":"URGMK7VHZLL6","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_16","alias_value":"URGMK7VHZLL6KCB2","created_at":"2026-05-18T12:32:56Z"},{"alias_kind":"pith_short_8","alias_value":"URGMK7VH","created_at":"2026-05-18T12:32:56Z"}],"graph_snapshots":[{"event_id":"sha256:2b0baab8d357d4fc7d3df6d7d5dbac7e15dc5a3cbaee74e9e70ab4514ec0a39f","target":"graph","created_at":"2026-05-18T00:18:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose Quootstrap, a method for extracting quotations, as well as the names of the speakers who uttered them, from large news corpora. Whereas prior work has addressed this problem primarily with supervised machine learning, our approach follows a fully unsupervised bootstrapping paradigm. It leverages the redundancy present in large news corpora, more precisely, the fact that the same quotation often appears across multiple news articles in slightly different contexts. Starting from a few seed patterns, such as [\"Q\", said S.], our method extracts a set of quotation-speaker pairs (Q, S), w","authors_text":"Dario Pavllo, Robert West, Tiziano Piccardi","cross_cats":["cs.CL","cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SI","submitted_at":"2018-04-07T07:50:50Z","title":"Quootstrap: Scalable Unsupervised Extraction of Quotation-Speaker Pairs from Large News Corpora via Bootstrapping"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.02525","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:77a58e2992273e0f13a21b9c006bcc4be61716b8320f697803ba17b249accdc1","target":"record","created_at":"2026-05-18T00:18:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8e35c68004cc4d207fc10d6c82786f8c0f8c17842c41f5a85b546768cc0dba64","cross_cats_sorted":["cs.CL","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SI","submitted_at":"2018-04-07T07:50:50Z","title_canon_sha256":"cd56d42a19a802510ada4a579b81bd0797c0238d9e4b3b00d3abd30fad9f56f0"},"schema_version":"1.0","source":{"id":"1804.02525","kind":"arxiv","version":1}},"canonical_sha256":"a44cc57ea7cad7e5083ae71161310d253be49df221681edc8973dd6736942eea","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a44cc57ea7cad7e5083ae71161310d253be49df221681edc8973dd6736942eea","first_computed_at":"2026-05-18T00:18:59.206701Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:18:59.206701Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+gCergi7PwaW7Df6vaDYLUMdVuCvT+8YMP4fEAu+I/JwLvxJLwjh/736zX3V5qLhQOfQp9K0HFHa26qFyNImBA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:18:59.207130Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.02525","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:77a58e2992273e0f13a21b9c006bcc4be61716b8320f697803ba17b249accdc1","sha256:2b0baab8d357d4fc7d3df6d7d5dbac7e15dc5a3cbaee74e9e70ab4514ec0a39f"],"state_sha256":"8665cedee3ed07286bd96776059a2e9aeef173b912c201df89dd52285fab05cf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XYa1LthWZVO0QO79awRhf5LXWbdgHr/uOILjGtGaPO+0OhRea9Qoi2Ok3FerWLRV251WKEmXjw4EoFbSGIwHAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T17:47:21.013081Z","bundle_sha256":"6730d0fe481d3f3eb9c2db431fe0eb9b991fc5cc808ba8f07932c606981a76fc"}}