{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:ISIHVMLQ6D4KB74MW2YJZSMSGJ","short_pith_number":"pith:ISIHVMLQ","schema_version":"1.0","canonical_sha256":"44907ab170f0f8a0ff8cb6b09cc992324ea4cc16cafa71dc39e9f7163fc60392","source":{"kind":"arxiv","id":"1901.10539","version":1},"attestation_state":"computed","paper":{"title":"Impact of Data Pruning on Machine Learning Algorithm Performance","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Aneek Barman Roy, Arun Thundyill Saseendran, Debrup Chakraborty, Lovish Setia, Viren Chhabria","submitted_at":"2019-01-11T19:32:09Z","abstract_excerpt":"Dataset pruning is the process of removing sub-optimal tuples from a dataset to improve the learning of a machine learning model. In this paper, we compared the performance of different algorithms, first on an unpruned dataset and then on an iteratively pruned dataset. The goal was to understand whether an algorithm (say A) on an unpruned dataset performs better than another algorithm (say B), will algorithm B perform better on the pruned data or vice-versa. The dataset chosen for our analysis is a subset of the largest movie ratings database publicly available on the internet, IMDb [1]. The l"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.10539","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2019-01-11T19:32:09Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"616ed2d843bc4d5b02986b8692b84e073ed05f06c7af20979cd336da060a9c60","abstract_canon_sha256":"9977381fad6e03e88a16c84568ba0d29e46e6ba76da3c4458fa691ba147fcd23"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:06.714438Z","signature_b64":"m+FNiRpxNw3F2kD2dyaCNBiFNGoBwPAWe0xPXI/hAI3j4rFDYwEdK4JvoYjSvzN3/oSioziahidZxRV+xLxLCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"44907ab170f0f8a0ff8cb6b09cc992324ea4cc16cafa71dc39e9f7163fc60392","last_reissued_at":"2026-05-17T23:55:06.713772Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:06.713772Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Impact of Data Pruning on Machine Learning Algorithm Performance","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Aneek Barman Roy, Arun Thundyill Saseendran, Debrup Chakraborty, Lovish Setia, Viren Chhabria","submitted_at":"2019-01-11T19:32:09Z","abstract_excerpt":"Dataset pruning is the process of removing sub-optimal tuples from a dataset to improve the learning of a machine learning model. In this paper, we compared the performance of different algorithms, first on an unpruned dataset and then on an iteratively pruned dataset. The goal was to understand whether an algorithm (say A) on an unpruned dataset performs better than another algorithm (say B), will algorithm B perform better on the pruned data or vice-versa. The dataset chosen for our analysis is a subset of the largest movie ratings database publicly available on the internet, IMDb [1]. The l"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.10539","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.10539","created_at":"2026-05-17T23:55:06.713881+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.10539v1","created_at":"2026-05-17T23:55:06.713881+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.10539","created_at":"2026-05-17T23:55:06.713881+00:00"},{"alias_kind":"pith_short_12","alias_value":"ISIHVMLQ6D4K","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"ISIHVMLQ6D4KB74M","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"ISIHVMLQ","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ","json":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ.json","graph_json":"https://pith.science/api/pith-number/ISIHVMLQ6D4KB74MW2YJZSMSGJ/graph.json","events_json":"https://pith.science/api/pith-number/ISIHVMLQ6D4KB74MW2YJZSMSGJ/events.json","paper":"https://pith.science/paper/ISIHVMLQ"},"agent_actions":{"view_html":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ","download_json":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ.json","view_paper":"https://pith.science/paper/ISIHVMLQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.10539&json=true","fetch_graph":"https://pith.science/api/pith-number/ISIHVMLQ6D4KB74MW2YJZSMSGJ/graph.json","fetch_events":"https://pith.science/api/pith-number/ISIHVMLQ6D4KB74MW2YJZSMSGJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ/action/storage_attestation","attest_author":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ/action/author_attestation","sign_citation":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ/action/citation_signature","submit_replication":"https://pith.science/pith/ISIHVMLQ6D4KB74MW2YJZSMSGJ/action/replication_record"}},"created_at":"2026-05-17T23:55:06.713881+00:00","updated_at":"2026-05-17T23:55:06.713881+00:00"}