{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:YYZI34UGWIUCDUWWB2UQFX5FIH","short_pith_number":"pith:YYZI34UG","schema_version":"1.0","canonical_sha256":"c6328df286b22821d2d60ea902dfa541fd630f5cf05290424a56e1d8065e1ab9","source":{"kind":"arxiv","id":"1812.10564","version":1},"attestation_state":"computed","paper":{"title":"BlinkML: Efficient Maximum Likelihood Estimation with Probabilistic Guarantees","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB","stat.ML"],"primary_cat":"cs.LG","authors_text":"Barzan Mozafari, Jingyi Qing, Xiaoyang Shen, Yongjoo Park","submitted_at":"2018-12-26T22:35:21Z","abstract_excerpt":"The rising volume of datasets has made training machine learning (ML) models a major computational cost in the enterprise. Given the iterative nature of model and parameter tuning, many analysts use a small sample of their entire data during their initial stage of analysis to make quick decisions (e.g., what features or hyperparameters to use) and use the entire dataset only in later stages (i.e., when they have converged to a specific model). This sampling, however, is performed in an ad-hoc fashion. Most practitioners cannot precisely capture the effect of sampling on the quality of their mo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1812.10564","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-12-26T22:35:21Z","cross_cats_sorted":["cs.DB","stat.ML"],"title_canon_sha256":"8b90da3d9d1b78c112ca36ebb96a8bb98e0915e9d4c2f0b05ec792f13adf3d78","abstract_canon_sha256":"eb644fd7ce1d8868fd4782106fc88e40727ff5ff0c4225cfcbcaba236ba876cc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:20.658719Z","signature_b64":"slaNauTSeFAtheN9yrwf4vwD3pYHC2HNtmqkQLH/rTJQKRR8WQmS1PrUHUKH3XW+ob0z2a2OvbW90uY0z3XeDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c6328df286b22821d2d60ea902dfa541fd630f5cf05290424a56e1d8065e1ab9","last_reissued_at":"2026-05-17T23:57:20.658083Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:20.658083Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"BlinkML: Efficient Maximum Likelihood Estimation with Probabilistic Guarantees","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB","stat.ML"],"primary_cat":"cs.LG","authors_text":"Barzan Mozafari, Jingyi Qing, Xiaoyang Shen, Yongjoo Park","submitted_at":"2018-12-26T22:35:21Z","abstract_excerpt":"The rising volume of datasets has made training machine learning (ML) models a major computational cost in the enterprise. Given the iterative nature of model and parameter tuning, many analysts use a small sample of their entire data during their initial stage of analysis to make quick decisions (e.g., what features or hyperparameters to use) and use the entire dataset only in later stages (i.e., when they have converged to a specific model). This sampling, however, is performed in an ad-hoc fashion. Most practitioners cannot precisely capture the effect of sampling on the quality of their mo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1812.10564","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1812.10564","created_at":"2026-05-17T23:57:20.658165+00:00"},{"alias_kind":"arxiv_version","alias_value":"1812.10564v1","created_at":"2026-05-17T23:57:20.658165+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1812.10564","created_at":"2026-05-17T23:57:20.658165+00:00"},{"alias_kind":"pith_short_12","alias_value":"YYZI34UGWIUC","created_at":"2026-05-18T12:33:04.347982+00:00"},{"alias_kind":"pith_short_16","alias_value":"YYZI34UGWIUCDUWW","created_at":"2026-05-18T12:33:04.347982+00:00"},{"alias_kind":"pith_short_8","alias_value":"YYZI34UG","created_at":"2026-05-18T12:33:04.347982+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH","json":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH.json","graph_json":"https://pith.science/api/pith-number/YYZI34UGWIUCDUWWB2UQFX5FIH/graph.json","events_json":"https://pith.science/api/pith-number/YYZI34UGWIUCDUWWB2UQFX5FIH/events.json","paper":"https://pith.science/paper/1812.10564"},"agent_actions":{"view_html":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH","download_json":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH.json","view_paper":"https://pith.science/paper/1812.10564","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1812.10564&json=true","fetch_graph":"https://pith.science/api/pith-number/YYZI34UGWIUCDUWWB2UQFX5FIH/graph.json","fetch_events":"https://pith.science/api/pith-number/YYZI34UGWIUCDUWWB2UQFX5FIH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH/action/storage_attestation","attest_author":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH/action/author_attestation","sign_citation":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH/action/citation_signature","submit_replication":"https://pith.science/pith/YYZI34UGWIUCDUWWB2UQFX5FIH/action/replication_record"}},"created_at":"2026-05-17T23:57:20.658165+00:00","updated_at":"2026-05-17T23:57:20.658165+00:00"}