{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:YXUVRP56GGIOSYOZAMYHY4LYHQ","short_pith_number":"pith:YXUVRP56","schema_version":"1.0","canonical_sha256":"c5e958bfbe3190e961d903307c71783c1b809cef5f0ebe3ba556c813bb9b091b","source":{"kind":"arxiv","id":"1610.08123","version":4},"attestation_state":"computed","paper":{"title":"Socratic Learning: Augmenting Generative Models to Incorporate Latent Subsets in Training Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Bryan He, Christopher De Sa, Christopher R\\'e, Dan Iter, Paroma Varma, Peng Xu, Rose Yu","submitted_at":"2016-10-25T23:43:49Z","abstract_excerpt":"A challenge in training discriminative models like neural networks is obtaining enough labeled training data. Recent approaches use generative models to combine weak supervision sources, like user-defined heuristics or knowledge bases, to label training data. Prior work has explored learning accuracies for these sources even without ground truth labels, but they assume that a single accuracy parameter is sufficient to model the behavior of these sources over the entire training set. In particular, they fail to model latent subsets in the training data in which the supervision sources perform d"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1610.08123","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-10-25T23:43:49Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"42a9fdcc2da5fe02efb19662b7ba04ee57b6c72d9dcf80994d973b856802454a","abstract_canon_sha256":"d1516d986f77bb774752c4dd46f7793a938f40022567fe5847bbc53d7c634c08"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:34:10.251907Z","signature_b64":"4osAD3EHdzlraaxgU9CBbrBvmgAPIsGQc9d5YmWMkNwnXmYDTr5hJFYA1X13E+KaaHtV5nBVEuCa95CWvDyRCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c5e958bfbe3190e961d903307c71783c1b809cef5f0ebe3ba556c813bb9b091b","last_reissued_at":"2026-05-18T00:34:10.251193Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:34:10.251193Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Socratic Learning: Augmenting Generative Models to Incorporate Latent Subsets in Training Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Bryan He, Christopher De Sa, Christopher R\\'e, Dan Iter, Paroma Varma, Peng Xu, Rose Yu","submitted_at":"2016-10-25T23:43:49Z","abstract_excerpt":"A challenge in training discriminative models like neural networks is obtaining enough labeled training data. Recent approaches use generative models to combine weak supervision sources, like user-defined heuristics or knowledge bases, to label training data. Prior work has explored learning accuracies for these sources even without ground truth labels, but they assume that a single accuracy parameter is sufficient to model the behavior of these sources over the entire training set. In particular, they fail to model latent subsets in the training data in which the supervision sources perform d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1610.08123","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1610.08123","created_at":"2026-05-18T00:34:10.251323+00:00"},{"alias_kind":"arxiv_version","alias_value":"1610.08123v4","created_at":"2026-05-18T00:34:10.251323+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1610.08123","created_at":"2026-05-18T00:34:10.251323+00:00"},{"alias_kind":"pith_short_12","alias_value":"YXUVRP56GGIO","created_at":"2026-05-18T12:30:53.716459+00:00"},{"alias_kind":"pith_short_16","alias_value":"YXUVRP56GGIOSYOZ","created_at":"2026-05-18T12:30:53.716459+00:00"},{"alias_kind":"pith_short_8","alias_value":"YXUVRP56","created_at":"2026-05-18T12:30:53.716459+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2111.02080","citing_title":"An Explanation of In-context Learning as Implicit Bayesian Inference","ref_index":256,"is_internal_anchor":true},{"citing_arxiv_id":"2012.14913","citing_title":"Transformer Feed-Forward Layers Are Key-Value Memories","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"1911.08731","citing_title":"Distributionally Robust Neural Networks for Group Shifts: On the Importance of Regularization for Worst-Case Generalization","ref_index":211,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ","json":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ.json","graph_json":"https://pith.science/api/pith-number/YXUVRP56GGIOSYOZAMYHY4LYHQ/graph.json","events_json":"https://pith.science/api/pith-number/YXUVRP56GGIOSYOZAMYHY4LYHQ/events.json","paper":"https://pith.science/paper/YXUVRP56"},"agent_actions":{"view_html":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ","download_json":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ.json","view_paper":"https://pith.science/paper/YXUVRP56","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1610.08123&json=true","fetch_graph":"https://pith.science/api/pith-number/YXUVRP56GGIOSYOZAMYHY4LYHQ/graph.json","fetch_events":"https://pith.science/api/pith-number/YXUVRP56GGIOSYOZAMYHY4LYHQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ/action/storage_attestation","attest_author":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ/action/author_attestation","sign_citation":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ/action/citation_signature","submit_replication":"https://pith.science/pith/YXUVRP56GGIOSYOZAMYHY4LYHQ/action/replication_record"}},"created_at":"2026-05-18T00:34:10.251323+00:00","updated_at":"2026-05-18T00:34:10.251323+00:00"}