{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:IFD2A3SWPLLIXYYCGCT6E2HKW2","short_pith_number":"pith:IFD2A3SW","schema_version":"1.0","canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","source":{"kind":"arxiv","id":"1803.06084","version":2},"attestation_state":"computed","paper":{"title":"A Kernel Theory of Modern Data Augmentation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Albert Gu, Alexander J. Ratner, Christopher De Sa, Christopher R\\'e, Tri Dao, Virginia Smith","submitted_at":"2018-03-16T06:05:32Z","abstract_excerpt":"Data augmentation, a technique in which a training set is expanded with class-preserving transformations, is ubiquitous in modern machine learning pipelines. In this paper, we seek to establish a theoretical framework for understanding data augmentation. We approach this from two directions: First, we provide a general model of augmentation as a Markov process, and show that kernels appear naturally with respect to this model, even when we do not employ kernel classification. Next, we analyze more directly the effect of augmentation on kernel classifiers, showing that data augmentation can be "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1803.06084","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-16T06:05:32Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"6f591c5dfee45aeab8027e83765505702e452f0c5cd3d66027f3e5922a98dd21","abstract_canon_sha256":"3df67073370c287fec7257e4393ce9c0e04cba8dfdeebe24311be4a3579560fe"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:49.302836Z","signature_b64":"ot7jtAcHzfoLrBQ8DP47w5/GU/KJlRERiaP9vQzeE6wjDkpIpSUe+geeZU5m6N6cfKUMTVRnLDcoezg+JTIMAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","last_reissued_at":"2026-05-17T23:50:49.302194Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:49.302194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Kernel Theory of Modern Data Augmentation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Albert Gu, Alexander J. Ratner, Christopher De Sa, Christopher R\\'e, Tri Dao, Virginia Smith","submitted_at":"2018-03-16T06:05:32Z","abstract_excerpt":"Data augmentation, a technique in which a training set is expanded with class-preserving transformations, is ubiquitous in modern machine learning pipelines. In this paper, we seek to establish a theoretical framework for understanding data augmentation. We approach this from two directions: First, we provide a general model of augmentation as a Markov process, and show that kernels appear naturally with respect to this model, even when we do not employ kernel classification. Next, we analyze more directly the effect of augmentation on kernel classifiers, showing that data augmentation can be "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.06084","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1803.06084","created_at":"2026-05-17T23:50:49.302281+00:00"},{"alias_kind":"arxiv_version","alias_value":"1803.06084v2","created_at":"2026-05-17T23:50:49.302281+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.06084","created_at":"2026-05-17T23:50:49.302281+00:00"},{"alias_kind":"pith_short_12","alias_value":"IFD2A3SWPLLI","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"IFD2A3SWPLLIXYYC","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"IFD2A3SW","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.15306","citing_title":"How Data Augmentation Shapes Neural Representations","ref_index":3,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2","json":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2.json","graph_json":"https://pith.science/api/pith-number/IFD2A3SWPLLIXYYCGCT6E2HKW2/graph.json","events_json":"https://pith.science/api/pith-number/IFD2A3SWPLLIXYYCGCT6E2HKW2/events.json","paper":"https://pith.science/paper/IFD2A3SW"},"agent_actions":{"view_html":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2","download_json":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2.json","view_paper":"https://pith.science/paper/IFD2A3SW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1803.06084&json=true","fetch_graph":"https://pith.science/api/pith-number/IFD2A3SWPLLIXYYCGCT6E2HKW2/graph.json","fetch_events":"https://pith.science/api/pith-number/IFD2A3SWPLLIXYYCGCT6E2HKW2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/action/storage_attestation","attest_author":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/action/author_attestation","sign_citation":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/action/citation_signature","submit_replication":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/action/replication_record"}},"created_at":"2026-05-17T23:50:49.302281+00:00","updated_at":"2026-05-17T23:50:49.302281+00:00"}