{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:EDLQMOSMPDKHDGLFGJVDX2JGIA","short_pith_number":"pith:EDLQMOSM","canonical_record":{"source":{"id":"1903.08816","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-03-21T03:04:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"acbee489d3adc2d7205bba97d5f7cc1d38e90cc5e61a75a4455bd73894d47efb","abstract_canon_sha256":"cc4093fd860db8ff9dfbab0bdc0414778869ad57ea24158299006ab264e7971e"},"schema_version":"1.0"},"canonical_sha256":"20d7063a4c78d4719965326a3be92640166075c244f06f9d682d3476d54c76f4","source":{"kind":"arxiv","id":"1903.08816","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.08816","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"arxiv_version","alias_value":"1903.08816v1","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.08816","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"pith_short_12","alias_value":"EDLQMOSMPDKH","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"EDLQMOSMPDKHDGLF","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"EDLQMOSM","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:EDLQMOSMPDKHDGLFGJVDX2JGIA","target":"record","payload":{"canonical_record":{"source":{"id":"1903.08816","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-03-21T03:04:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"acbee489d3adc2d7205bba97d5f7cc1d38e90cc5e61a75a4455bd73894d47efb","abstract_canon_sha256":"cc4093fd860db8ff9dfbab0bdc0414778869ad57ea24158299006ab264e7971e"},"schema_version":"1.0"},"canonical_sha256":"20d7063a4c78d4719965326a3be92640166075c244f06f9d682d3476d54c76f4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:43.849906Z","signature_b64":"QDsBjis0h5BXh79IfWkRDLetGcy4TxKesvPyNhPr0W916l/9AEn7Ml8Yl0/fmwcG2YChunzTIUSz5jCN0Fy0AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"20d7063a4c78d4719965326a3be92640166075c244f06f9d682d3476d54c76f4","last_reissued_at":"2026-05-17T23:50:43.849256Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:43.849256Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1903.08816","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+KH/qBbaGxV8doRJv+MY9pk1RBVPhvI7rqJmb7QqhsFwZ1yzq+ws8QdDvEDX74cl3H5uG31JxEB+YdVorMCrBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T16:10:15.590962Z"},"content_sha256":"7e9550857f450bf9657239b4a3d6299a488e28ee243d96b8a0dd90a0946887ce","schema_version":"1.0","event_id":"sha256:7e9550857f450bf9657239b4a3d6299a488e28ee243d96b8a0dd90a0946887ce"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:EDLQMOSMPDKHDGLFGJVDX2JGIA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Empirical Evaluations of Seed Set Selection Strategies for Predictive Coding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.IR","authors_text":"Christian J. Mahoney, Haozhen Zhao, Katie Jensen, Nathaniel Huber-Fliflet, Robert Neary, Shi Ye","submitted_at":"2019-03-21T03:04:30Z","abstract_excerpt":"Training documents have a significant impact on the performance of predictive models in the legal domain. Yet, there is limited research that explores the effectiveness of the training document selection strategy - in particular, the strategy used to select the seed set, or the set of documents an attorney reviews first to establish an initial model. Since there is limited research on this important component of predictive coding, the authors of this paper set out to identify strategies that consistently perform well. Our research demonstrated that the seed set selection strategy can have a si"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.08816","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AR9ZrV2+TsT7GheLldtv4gfMVb7fgAw5BX9n0GsgongJjj47lvNOtY9o8ZOrVT60yC2EUUIvlfzR0XTV4pp/BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T16:10:15.591733Z"},"content_sha256":"cbc223d29e7e415169719e451da9fde253150735c64e5f214c7bf66c67f0b310","schema_version":"1.0","event_id":"sha256:cbc223d29e7e415169719e451da9fde253150735c64e5f214c7bf66c67f0b310"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA/bundle.json","state_url":"https://pith.science/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T16:10:15Z","links":{"resolver":"https://pith.science/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA","bundle":"https://pith.science/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA/bundle.json","state":"https://pith.science/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EDLQMOSMPDKHDGLFGJVDX2JGIA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:EDLQMOSMPDKHDGLFGJVDX2JGIA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cc4093fd860db8ff9dfbab0bdc0414778869ad57ea24158299006ab264e7971e","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-03-21T03:04:30Z","title_canon_sha256":"acbee489d3adc2d7205bba97d5f7cc1d38e90cc5e61a75a4455bd73894d47efb"},"schema_version":"1.0","source":{"id":"1903.08816","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1903.08816","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"arxiv_version","alias_value":"1903.08816v1","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.08816","created_at":"2026-05-17T23:50:43Z"},{"alias_kind":"pith_short_12","alias_value":"EDLQMOSMPDKH","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"EDLQMOSMPDKHDGLF","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"EDLQMOSM","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:cbc223d29e7e415169719e451da9fde253150735c64e5f214c7bf66c67f0b310","target":"graph","created_at":"2026-05-17T23:50:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Training documents have a significant impact on the performance of predictive models in the legal domain. Yet, there is limited research that explores the effectiveness of the training document selection strategy - in particular, the strategy used to select the seed set, or the set of documents an attorney reviews first to establish an initial model. Since there is limited research on this important component of predictive coding, the authors of this paper set out to identify strategies that consistently perform well. Our research demonstrated that the seed set selection strategy can have a si","authors_text":"Christian J. Mahoney, Haozhen Zhao, Katie Jensen, Nathaniel Huber-Fliflet, Robert Neary, Shi Ye","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-03-21T03:04:30Z","title":"Empirical Evaluations of Seed Set Selection Strategies for Predictive Coding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.08816","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7e9550857f450bf9657239b4a3d6299a488e28ee243d96b8a0dd90a0946887ce","target":"record","created_at":"2026-05-17T23:50:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cc4093fd860db8ff9dfbab0bdc0414778869ad57ea24158299006ab264e7971e","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-03-21T03:04:30Z","title_canon_sha256":"acbee489d3adc2d7205bba97d5f7cc1d38e90cc5e61a75a4455bd73894d47efb"},"schema_version":"1.0","source":{"id":"1903.08816","kind":"arxiv","version":1}},"canonical_sha256":"20d7063a4c78d4719965326a3be92640166075c244f06f9d682d3476d54c76f4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"20d7063a4c78d4719965326a3be92640166075c244f06f9d682d3476d54c76f4","first_computed_at":"2026-05-17T23:50:43.849256Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:43.849256Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QDsBjis0h5BXh79IfWkRDLetGcy4TxKesvPyNhPr0W916l/9AEn7Ml8Yl0/fmwcG2YChunzTIUSz5jCN0Fy0AA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:43.849906Z","signed_message":"canonical_sha256_bytes"},"source_id":"1903.08816","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7e9550857f450bf9657239b4a3d6299a488e28ee243d96b8a0dd90a0946887ce","sha256:cbc223d29e7e415169719e451da9fde253150735c64e5f214c7bf66c67f0b310"],"state_sha256":"5c939c59fdb65dd9a69d1a4d3622aa638ae7234aa68c231164b7558aa578988f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"N80VLqK5zOb+Rgd94gI+1838M00FAcC3QX4vVe0zjkPtJox+haCLi+kfWRZy9mo2EcyrqJtQ5CCq2xtNsRP2AA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T16:10:15.595749Z","bundle_sha256":"5b947ae9d5e135f9bcd481f941cc9f3d19dbafc9a446798a9f70632904900475"}}