{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:IFD2A3SWPLLIXYYCGCT6E2HKW2","short_pith_number":"pith:IFD2A3SW","canonical_record":{"source":{"id":"1803.06084","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-16T06:05:32Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"6f591c5dfee45aeab8027e83765505702e452f0c5cd3d66027f3e5922a98dd21","abstract_canon_sha256":"3df67073370c287fec7257e4393ce9c0e04cba8dfdeebe24311be4a3579560fe"},"schema_version":"1.0"},"canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","source":{"kind":"arxiv","id":"1803.06084","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.06084","created_at":"2026-05-17T23:50:49Z"},{"alias_kind":"arxiv_version","alias_value":"1803.06084v2","created_at":"2026-05-17T23:50:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.06084","created_at":"2026-05-17T23:50:49Z"},{"alias_kind":"pith_short_12","alias_value":"IFD2A3SWPLLI","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"IFD2A3SWPLLIXYYC","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"IFD2A3SW","created_at":"2026-05-18T12:32:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:IFD2A3SWPLLIXYYCGCT6E2HKW2","target":"record","payload":{"canonical_record":{"source":{"id":"1803.06084","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-16T06:05:32Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"6f591c5dfee45aeab8027e83765505702e452f0c5cd3d66027f3e5922a98dd21","abstract_canon_sha256":"3df67073370c287fec7257e4393ce9c0e04cba8dfdeebe24311be4a3579560fe"},"schema_version":"1.0"},"canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:49.302836Z","signature_b64":"ot7jtAcHzfoLrBQ8DP47w5/GU/KJlRERiaP9vQzeE6wjDkpIpSUe+geeZU5m6N6cfKUMTVRnLDcoezg+JTIMAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","last_reissued_at":"2026-05-17T23:50:49.302194Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:49.302194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.06084","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4W4qqJOGh650Sh45ZkFeDnTwquq2O7cP4XnNCxg9wkoey5FZ+U/WJtw6Fr3AAg8VOLbN6q4e5aZhg/Kyby9CCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T22:54:52.047249Z"},"content_sha256":"e99e97aa166d47257f7c0a5b170c6327973a6fc7d0e3fd8689c5f53bb42fb804","schema_version":"1.0","event_id":"sha256:e99e97aa166d47257f7c0a5b170c6327973a6fc7d0e3fd8689c5f53bb42fb804"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:IFD2A3SWPLLIXYYCGCT6E2HKW2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Kernel Theory of Modern Data Augmentation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Albert Gu, Alexander J. Ratner, Christopher De Sa, Christopher R\\'e, Tri Dao, Virginia Smith","submitted_at":"2018-03-16T06:05:32Z","abstract_excerpt":"Data augmentation, a technique in which a training set is expanded with class-preserving transformations, is ubiquitous in modern machine learning pipelines. In this paper, we seek to establish a theoretical framework for understanding data augmentation. We approach this from two directions: First, we provide a general model of augmentation as a Markov process, and show that kernels appear naturally with respect to this model, even when we do not employ kernel classification. Next, we analyze more directly the effect of augmentation on kernel classifiers, showing that data augmentation can be "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.06084","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:50:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qxam3yEecryl3/Y47ZnCK+1GXngjiUTpzsICnq8SEgY+Ze7ASIwpEYBR9vdwtVA8NhDv+DStaTkPJX2+3Pc0Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T22:54:52.047899Z"},"content_sha256":"cd05f9379a30be9d4b9a6e6d3888e3cecd6d28e3a83e309736acf4f68937a07e","schema_version":"1.0","event_id":"sha256:cd05f9379a30be9d4b9a6e6d3888e3cecd6d28e3a83e309736acf4f68937a07e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/bundle.json","state_url":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T22:54:52Z","links":{"resolver":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2","bundle":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/bundle.json","state":"https://pith.science/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IFD2A3SWPLLIXYYCGCT6E2HKW2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:IFD2A3SWPLLIXYYCGCT6E2HKW2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3df67073370c287fec7257e4393ce9c0e04cba8dfdeebe24311be4a3579560fe","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-16T06:05:32Z","title_canon_sha256":"6f591c5dfee45aeab8027e83765505702e452f0c5cd3d66027f3e5922a98dd21"},"schema_version":"1.0","source":{"id":"1803.06084","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.06084","created_at":"2026-05-17T23:50:49Z"},{"alias_kind":"arxiv_version","alias_value":"1803.06084v2","created_at":"2026-05-17T23:50:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.06084","created_at":"2026-05-17T23:50:49Z"},{"alias_kind":"pith_short_12","alias_value":"IFD2A3SWPLLI","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"IFD2A3SWPLLIXYYC","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"IFD2A3SW","created_at":"2026-05-18T12:32:28Z"}],"graph_snapshots":[{"event_id":"sha256:cd05f9379a30be9d4b9a6e6d3888e3cecd6d28e3a83e309736acf4f68937a07e","target":"graph","created_at":"2026-05-17T23:50:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Data augmentation, a technique in which a training set is expanded with class-preserving transformations, is ubiquitous in modern machine learning pipelines. In this paper, we seek to establish a theoretical framework for understanding data augmentation. We approach this from two directions: First, we provide a general model of augmentation as a Markov process, and show that kernels appear naturally with respect to this model, even when we do not employ kernel classification. Next, we analyze more directly the effect of augmentation on kernel classifiers, showing that data augmentation can be ","authors_text":"Albert Gu, Alexander J. Ratner, Christopher De Sa, Christopher R\\'e, Tri Dao, Virginia Smith","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-16T06:05:32Z","title":"A Kernel Theory of Modern Data Augmentation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.06084","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e99e97aa166d47257f7c0a5b170c6327973a6fc7d0e3fd8689c5f53bb42fb804","target":"record","created_at":"2026-05-17T23:50:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3df67073370c287fec7257e4393ce9c0e04cba8dfdeebe24311be4a3579560fe","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-16T06:05:32Z","title_canon_sha256":"6f591c5dfee45aeab8027e83765505702e452f0c5cd3d66027f3e5922a98dd21"},"schema_version":"1.0","source":{"id":"1803.06084","kind":"arxiv","version":2}},"canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4147a06e567ad68be30230a7e268eab689a4483a07c17477e7884f5dd7c2d9ba","first_computed_at":"2026-05-17T23:50:49.302194Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:50:49.302194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ot7jtAcHzfoLrBQ8DP47w5/GU/KJlRERiaP9vQzeE6wjDkpIpSUe+geeZU5m6N6cfKUMTVRnLDcoezg+JTIMAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:50:49.302836Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.06084","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e99e97aa166d47257f7c0a5b170c6327973a6fc7d0e3fd8689c5f53bb42fb804","sha256:cd05f9379a30be9d4b9a6e6d3888e3cecd6d28e3a83e309736acf4f68937a07e"],"state_sha256":"f9f54597ede514136f0719a064c176f5eeae0450480465bab5f531d3da6f0841"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KC45KoNh3/VDjX2m5TBNaMFxHa1puZhHayO4ImU+GujSv5u7AL3iUHq2BlAp0AV+kcCAV/WDmCPHfvuBqjBhDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T22:54:52.051372Z","bundle_sha256":"21b7f7515c8193fb77cd776e9daf324e59f9c73b56c9acb67339634e8e35d86b"}}