{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:J655K2KRI7DJHF4SOFYZCQRRE5","short_pith_number":"pith:J655K2KR","canonical_record":{"source":{"id":"1904.00962","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-01T16:53:35Z","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"title_canon_sha256":"a5caf378a4c2dd1dd9eac4879099bbb0eb0311ed2a0c2f9f2217d21148ad731e","abstract_canon_sha256":"85b43dc17abea7dc1561a9470fd799224beb337b037b437a99d04548d290b798"},"schema_version":"1.0"},"canonical_sha256":"4fbbd5695147c693979271719142312751c3535b842a03f4c428753a52087fea","source":{"kind":"arxiv","id":"1904.00962","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.00962","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"arxiv_version","alias_value":"1904.00962v5","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.00962","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"pith_short_12","alias_value":"J655K2KRI7DJ","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"pith_short_16","alias_value":"J655K2KRI7DJHF4S","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"pith_short_8","alias_value":"J655K2KR","created_at":"2026-05-21T21:29:48Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:J655K2KRI7DJHF4SOFYZCQRRE5","target":"record","payload":{"canonical_record":{"source":{"id":"1904.00962","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-01T16:53:35Z","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"title_canon_sha256":"a5caf378a4c2dd1dd9eac4879099bbb0eb0311ed2a0c2f9f2217d21148ad731e","abstract_canon_sha256":"85b43dc17abea7dc1561a9470fd799224beb337b037b437a99d04548d290b798"},"schema_version":"1.0"},"canonical_sha256":"4fbbd5695147c693979271719142312751c3535b842a03f4c428753a52087fea","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T21:29:48.722412Z","signature_b64":"45ODB9eYA7bNRGE8R4hsxXAdjIog3sHqwSeuWHnMvjOcbJKDujSvvZAQ4G7MCxFcajuANexXZ55d5WTsgDOpBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4fbbd5695147c693979271719142312751c3535b842a03f4c428753a52087fea","last_reissued_at":"2026-05-21T21:29:48.720470Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T21:29:48.720470Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.00962","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T21:29:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"br4SsCGM0rFSG1Fi3BjsUJBaur14URIdTYmcBbKYF6XVumJN515D9bywzWQlK8IcnucX4hDOe1qRzKjARnC9DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T21:53:44.488291Z"},"content_sha256":"f33578c2f71d45c8f568aeb66dcea701845a3a4c9095b4d336779cbdccc8f3e7","schema_version":"1.0","event_id":"sha256:f33578c2f71d45c8f568aeb66dcea701845a3a4c9095b4d336779cbdccc8f3e7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:J655K2KRI7DJHF4SOFYZCQRRE5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Large Batch Optimization for Deep Learning: Training BERT in 76 minutes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Cho-Jui Hsieh, James Demmel, Jing Li, Jonathan Hseu, Kurt Keutzer, Sanjiv Kumar, Sashank Reddi, Srinadh Bhojanapalli, Xiaodan Song, Yang You","submitted_at":"2019-04-01T16:53:35Z","abstract_excerpt":"Training large deep neural networks on massive datasets is computationally very challenging. There has been recent surge in interest in using large batch stochastic optimization methods to tackle this issue. The most prominent algorithm in this line of research is LARS, which by employing layerwise adaptive learning rates trains ResNet on ImageNet in a few minutes. However, LARS performs poorly for attention models like BERT, indicating that its performance gains are not consistent across tasks. In this paper, we first study a principled layerwise adaptation strategy to accelerate training of "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.00962","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/1904.00962/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T21:29:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8zwt+JB66GPFr57WfzmP0SIF8XI8uwLT7LXTxvqOhz8TZ5ppn/454CiiauNwdqcbs9fag5Y6GDvRsVg/up+GAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T21:53:44.488744Z"},"content_sha256":"4b8053909389e03e001e75b01ad45710e8ab176345d425c75b2ebbca3bcf3913","schema_version":"1.0","event_id":"sha256:4b8053909389e03e001e75b01ad45710e8ab176345d425c75b2ebbca3bcf3913"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/J655K2KRI7DJHF4SOFYZCQRRE5/bundle.json","state_url":"https://pith.science/pith/J655K2KRI7DJHF4SOFYZCQRRE5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/J655K2KRI7DJHF4SOFYZCQRRE5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T21:53:44Z","links":{"resolver":"https://pith.science/pith/J655K2KRI7DJHF4SOFYZCQRRE5","bundle":"https://pith.science/pith/J655K2KRI7DJHF4SOFYZCQRRE5/bundle.json","state":"https://pith.science/pith/J655K2KRI7DJHF4SOFYZCQRRE5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/J655K2KRI7DJHF4SOFYZCQRRE5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:J655K2KRI7DJHF4SOFYZCQRRE5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"85b43dc17abea7dc1561a9470fd799224beb337b037b437a99d04548d290b798","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-01T16:53:35Z","title_canon_sha256":"a5caf378a4c2dd1dd9eac4879099bbb0eb0311ed2a0c2f9f2217d21148ad731e"},"schema_version":"1.0","source":{"id":"1904.00962","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.00962","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"arxiv_version","alias_value":"1904.00962v5","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.00962","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"pith_short_12","alias_value":"J655K2KRI7DJ","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"pith_short_16","alias_value":"J655K2KRI7DJHF4S","created_at":"2026-05-21T21:29:48Z"},{"alias_kind":"pith_short_8","alias_value":"J655K2KR","created_at":"2026-05-21T21:29:48Z"}],"graph_snapshots":[{"event_id":"sha256:4b8053909389e03e001e75b01ad45710e8ab176345d425c75b2ebbca3bcf3913","target":"graph","created_at":"2026-05-21T21:29:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/1904.00962/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Training large deep neural networks on massive datasets is computationally very challenging. There has been recent surge in interest in using large batch stochastic optimization methods to tackle this issue. The most prominent algorithm in this line of research is LARS, which by employing layerwise adaptive learning rates trains ResNet on ImageNet in a few minutes. However, LARS performs poorly for attention models like BERT, indicating that its performance gains are not consistent across tasks. In this paper, we first study a principled layerwise adaptation strategy to accelerate training of ","authors_text":"Cho-Jui Hsieh, James Demmel, Jing Li, Jonathan Hseu, Kurt Keutzer, Sanjiv Kumar, Sashank Reddi, Srinadh Bhojanapalli, Xiaodan Song, Yang You","cross_cats":["cs.AI","cs.CL","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-01T16:53:35Z","title":"Large Batch Optimization for Deep Learning: Training BERT in 76 minutes"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.00962","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f33578c2f71d45c8f568aeb66dcea701845a3a4c9095b4d336779cbdccc8f3e7","target":"record","created_at":"2026-05-21T21:29:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"85b43dc17abea7dc1561a9470fd799224beb337b037b437a99d04548d290b798","cross_cats_sorted":["cs.AI","cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-01T16:53:35Z","title_canon_sha256":"a5caf378a4c2dd1dd9eac4879099bbb0eb0311ed2a0c2f9f2217d21148ad731e"},"schema_version":"1.0","source":{"id":"1904.00962","kind":"arxiv","version":5}},"canonical_sha256":"4fbbd5695147c693979271719142312751c3535b842a03f4c428753a52087fea","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4fbbd5695147c693979271719142312751c3535b842a03f4c428753a52087fea","first_computed_at":"2026-05-21T21:29:48.720470Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T21:29:48.720470Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"45ODB9eYA7bNRGE8R4hsxXAdjIog3sHqwSeuWHnMvjOcbJKDujSvvZAQ4G7MCxFcajuANexXZ55d5WTsgDOpBw==","signature_status":"signed_v1","signed_at":"2026-05-21T21:29:48.722412Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.00962","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f33578c2f71d45c8f568aeb66dcea701845a3a4c9095b4d336779cbdccc8f3e7","sha256:4b8053909389e03e001e75b01ad45710e8ab176345d425c75b2ebbca3bcf3913"],"state_sha256":"4ed1bb9f4fa9ddbc47da73a861427bf6775c930f877c9f211507e6fe3691ad19"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"h3ipIMslIWOKYtZCzWKSOjfv+nRem8gGx0RYTE16vYfdIYOgec4UnTqmzqh7iXZecwGum+QhyYIHsVK+DbpNCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T21:53:44.491559Z","bundle_sha256":"f8abdbbfc5fc1bb47bda048689c3913aa37fc836c4ede9ca5546960956f7cd9e"}}