{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:R3KV3QAS4QJKEGKVXV3S4MEJM2","short_pith_number":"pith:R3KV3QAS","canonical_record":{"source":{"id":"1712.07628","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-20T18:34:08Z","cross_cats_sorted":["math.OC"],"title_canon_sha256":"8289420afd7c9f331074fbeae53c2322559be3bf5d7049c386a51b45c9ddefd0","abstract_canon_sha256":"2b08f046e7acd5bbae3c92f8d779ab1415dd3a25d16d09a78134bc7fd3522732"},"schema_version":"1.0"},"canonical_sha256":"8ed55dc012e412a21955bd772e308966beab5975f43dca78f62010eaae1032dc","source":{"kind":"arxiv","id":"1712.07628","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.07628","created_at":"2026-05-18T00:27:34Z"},{"alias_kind":"arxiv_version","alias_value":"1712.07628v1","created_at":"2026-05-18T00:27:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.07628","created_at":"2026-05-18T00:27:34Z"},{"alias_kind":"pith_short_12","alias_value":"R3KV3QAS4QJK","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_16","alias_value":"R3KV3QAS4QJKEGKV","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_8","alias_value":"R3KV3QAS","created_at":"2026-05-18T12:31:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:R3KV3QAS4QJKEGKVXV3S4MEJM2","target":"record","payload":{"canonical_record":{"source":{"id":"1712.07628","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-20T18:34:08Z","cross_cats_sorted":["math.OC"],"title_canon_sha256":"8289420afd7c9f331074fbeae53c2322559be3bf5d7049c386a51b45c9ddefd0","abstract_canon_sha256":"2b08f046e7acd5bbae3c92f8d779ab1415dd3a25d16d09a78134bc7fd3522732"},"schema_version":"1.0"},"canonical_sha256":"8ed55dc012e412a21955bd772e308966beab5975f43dca78f62010eaae1032dc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:27:34.007092Z","signature_b64":"1zreiMU58iX5O/iOZavO55M0TIgl3/kKumiyHxT5nVppN1IN/TIQtY4NJBmSodaU4OPPydr/xHP+jboD/IVaCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8ed55dc012e412a21955bd772e308966beab5975f43dca78f62010eaae1032dc","last_reissued_at":"2026-05-18T00:27:34.006363Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:27:34.006363Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.07628","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:27:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hwcSVq4KW9hUhx6CLLuE41Nruj8sIVjcxiKIbZppr7lsqdSvw+x4YOCwreO7yaRLVS1NWQNMEuFfWvJwjReVAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T21:22:40.901086Z"},"content_sha256":"c2863827f17fbc0944b4d6eb9de090d5ac025b13fea9955e358a8c4e8917ed60","schema_version":"1.0","event_id":"sha256:c2863827f17fbc0944b4d6eb9de090d5ac025b13fea9955e358a8c4e8917ed60"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:R3KV3QAS4QJKEGKVXV3S4MEJM2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improving Generalization Performance by Switching from Adam to SGD","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["math.OC"],"primary_cat":"cs.LG","authors_text":"Nitish Shirish Keskar, Richard Socher","submitted_at":"2017-12-20T18:34:08Z","abstract_excerpt":"Despite superior training outcomes, adaptive optimization methods such as Adam, Adagrad or RMSprop have been found to generalize poorly compared to Stochastic gradient descent (SGD). These methods tend to perform well in the initial portion of training but are outperformed by SGD at later stages of training. We investigate a hybrid strategy that begins training with an adaptive method and switches to SGD when appropriate. Concretely, we propose SWATS, a simple strategy which switches from Adam to SGD when a triggering condition is satisfied. The condition we propose relates to the projection o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.07628","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:27:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cebCU4qa4GllFLvLYXv9I9rp7Uk/7BUR/XMfyD+rXRhpfd1sZ+sWiaYz8pxIABxKxT9wvWuR4wK+LUq7Nf+mDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T21:22:40.901749Z"},"content_sha256":"ef4c3ae29d3b381664eceef0d5493dcea444f0cbecb6de445075edc83897851d","schema_version":"1.0","event_id":"sha256:ef4c3ae29d3b381664eceef0d5493dcea444f0cbecb6de445075edc83897851d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2/bundle.json","state_url":"https://pith.science/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T21:22:40Z","links":{"resolver":"https://pith.science/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2","bundle":"https://pith.science/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2/bundle.json","state":"https://pith.science/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R3KV3QAS4QJKEGKVXV3S4MEJM2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:R3KV3QAS4QJKEGKVXV3S4MEJM2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2b08f046e7acd5bbae3c92f8d779ab1415dd3a25d16d09a78134bc7fd3522732","cross_cats_sorted":["math.OC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-20T18:34:08Z","title_canon_sha256":"8289420afd7c9f331074fbeae53c2322559be3bf5d7049c386a51b45c9ddefd0"},"schema_version":"1.0","source":{"id":"1712.07628","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.07628","created_at":"2026-05-18T00:27:34Z"},{"alias_kind":"arxiv_version","alias_value":"1712.07628v1","created_at":"2026-05-18T00:27:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.07628","created_at":"2026-05-18T00:27:34Z"},{"alias_kind":"pith_short_12","alias_value":"R3KV3QAS4QJK","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_16","alias_value":"R3KV3QAS4QJKEGKV","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_8","alias_value":"R3KV3QAS","created_at":"2026-05-18T12:31:39Z"}],"graph_snapshots":[{"event_id":"sha256:ef4c3ae29d3b381664eceef0d5493dcea444f0cbecb6de445075edc83897851d","target":"graph","created_at":"2026-05-18T00:27:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Despite superior training outcomes, adaptive optimization methods such as Adam, Adagrad or RMSprop have been found to generalize poorly compared to Stochastic gradient descent (SGD). These methods tend to perform well in the initial portion of training but are outperformed by SGD at later stages of training. We investigate a hybrid strategy that begins training with an adaptive method and switches to SGD when appropriate. Concretely, we propose SWATS, a simple strategy which switches from Adam to SGD when a triggering condition is satisfied. The condition we propose relates to the projection o","authors_text":"Nitish Shirish Keskar, Richard Socher","cross_cats":["math.OC"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-20T18:34:08Z","title":"Improving Generalization Performance by Switching from Adam to SGD"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.07628","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c2863827f17fbc0944b4d6eb9de090d5ac025b13fea9955e358a8c4e8917ed60","target":"record","created_at":"2026-05-18T00:27:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2b08f046e7acd5bbae3c92f8d779ab1415dd3a25d16d09a78134bc7fd3522732","cross_cats_sorted":["math.OC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-20T18:34:08Z","title_canon_sha256":"8289420afd7c9f331074fbeae53c2322559be3bf5d7049c386a51b45c9ddefd0"},"schema_version":"1.0","source":{"id":"1712.07628","kind":"arxiv","version":1}},"canonical_sha256":"8ed55dc012e412a21955bd772e308966beab5975f43dca78f62010eaae1032dc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8ed55dc012e412a21955bd772e308966beab5975f43dca78f62010eaae1032dc","first_computed_at":"2026-05-18T00:27:34.006363Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:27:34.006363Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1zreiMU58iX5O/iOZavO55M0TIgl3/kKumiyHxT5nVppN1IN/TIQtY4NJBmSodaU4OPPydr/xHP+jboD/IVaCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:27:34.007092Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.07628","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c2863827f17fbc0944b4d6eb9de090d5ac025b13fea9955e358a8c4e8917ed60","sha256:ef4c3ae29d3b381664eceef0d5493dcea444f0cbecb6de445075edc83897851d"],"state_sha256":"2928e91f47c3dbca1171a3ea3ac772d4717b7cf19363d3640d361a503d7f48fc"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6fefqPOecCbmVIj6sgsuW8PcbNnyrCZjyBqql1CcoD2k6cTdkFMgz9vp5yP1BDg2MP/4Jh0BUeaLlNBfmfIWBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T21:22:40.905487Z","bundle_sha256":"50fe4c28517758cdcd39a3835a4195b03ffac1f28512d8778b31f698e8e858eb"}}