{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:342NOUZLECG3RAI6RYWETCS46W","short_pith_number":"pith:342NOUZL","schema_version":"1.0","canonical_sha256":"df34d7532b208db8811e8e2c498a5cf5858a6f859cd6d6c7353396678cc74797","source":{"kind":"arxiv","id":"1906.04367","version":1},"attestation_state":"computed","paper":{"title":"Evaluation of Seed Set Selection Approaches and Active Learning Strategies in Predictive Coding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Christian J. Mahoney, Haozhen Zhao, Jianping Zhang, Nathaniel Huber-Fliflet, Peter Gronvall, Shi Ye","submitted_at":"2019-06-11T03:17:22Z","abstract_excerpt":"Active learning is a popular methodology in text classification - known in the legal domain as \"predictive coding\" or \"Technology Assisted Review\" or \"TAR\" - due to its potential to minimize the required review effort to build effective classifiers. In this study, we use extensive experimentation to examine the impact of popular seed set selection strategies in active learning, within a predictive coding exercise, and evaluate different active learning strategies against well-researched continuous active learning strategies for the purpose of determining efficient training methods for classify"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.04367","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2019-06-11T03:17:22Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"0cc4a10a52595adcf5949da02d26e5fbd6a0e442ec434c94a8b194e05bf6164f","abstract_canon_sha256":"0764d46187d314607455732b97a2c3c720065afcbacec058701ecee1c9978640"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:39.529811Z","signature_b64":"CrHQJ5MLBuI8C9M6aoBUiivhSBIKWBd6zWvLqP6x2Q6IcXV8SSC8u+vGuFsgoCkbeALbKe8uKgbX7+ibUYe4CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"df34d7532b208db8811e8e2c498a5cf5858a6f859cd6d6c7353396678cc74797","last_reissued_at":"2026-05-17T23:43:39.529084Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:39.529084Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Evaluation of Seed Set Selection Approaches and Active Learning Strategies in Predictive Coding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Christian J. Mahoney, Haozhen Zhao, Jianping Zhang, Nathaniel Huber-Fliflet, Peter Gronvall, Shi Ye","submitted_at":"2019-06-11T03:17:22Z","abstract_excerpt":"Active learning is a popular methodology in text classification - known in the legal domain as \"predictive coding\" or \"Technology Assisted Review\" or \"TAR\" - due to its potential to minimize the required review effort to build effective classifiers. In this study, we use extensive experimentation to examine the impact of popular seed set selection strategies in active learning, within a predictive coding exercise, and evaluate different active learning strategies against well-researched continuous active learning strategies for the purpose of determining efficient training methods for classify"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.04367","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.04367","created_at":"2026-05-17T23:43:39.529193+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.04367v1","created_at":"2026-05-17T23:43:39.529193+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.04367","created_at":"2026-05-17T23:43:39.529193+00:00"},{"alias_kind":"pith_short_12","alias_value":"342NOUZLECG3","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_16","alias_value":"342NOUZLECG3RAI6","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_8","alias_value":"342NOUZL","created_at":"2026-05-18T12:33:07.085635+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W","json":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W.json","graph_json":"https://pith.science/api/pith-number/342NOUZLECG3RAI6RYWETCS46W/graph.json","events_json":"https://pith.science/api/pith-number/342NOUZLECG3RAI6RYWETCS46W/events.json","paper":"https://pith.science/paper/342NOUZL"},"agent_actions":{"view_html":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W","download_json":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W.json","view_paper":"https://pith.science/paper/342NOUZL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.04367&json=true","fetch_graph":"https://pith.science/api/pith-number/342NOUZLECG3RAI6RYWETCS46W/graph.json","fetch_events":"https://pith.science/api/pith-number/342NOUZLECG3RAI6RYWETCS46W/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W/action/timestamp_anchor","attest_storage":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W/action/storage_attestation","attest_author":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W/action/author_attestation","sign_citation":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W/action/citation_signature","submit_replication":"https://pith.science/pith/342NOUZLECG3RAI6RYWETCS46W/action/replication_record"}},"created_at":"2026-05-17T23:43:39.529193+00:00","updated_at":"2026-05-17T23:43:39.529193+00:00"}