{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:FZVFMSKND7UKID5BII7XQMCBDB","short_pith_number":"pith:FZVFMSKN","schema_version":"1.0","canonical_sha256":"2e6a56494d1fe8a40fa1423f783041187a1f590299749c46f58a4964c0b0576c","source":{"kind":"arxiv","id":"1712.09913","version":3},"attestation_state":"computed","paper":{"title":"Visualizing the Loss Landscape of Neural Nets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Christoph Studer, Gavin Taylor, Hao Li, Tom Goldstein, Zheng Xu","submitted_at":"2017-12-28T16:15:42Z","abstract_excerpt":"Neural network training relies on our ability to find \"good\" minimizers of highly non-convex loss functions. It is well-known that certain network architecture designs (e.g., skip connections) produce loss functions that train easier, and well-chosen training parameters (batch size, learning rate, optimizer) produce minimizers that generalize better. However, the reasons for these differences, and their effects on the underlying loss landscape, are not well understood. In this paper, we explore the structure of neural loss functions, and the effect of loss landscapes on generalization, using a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1712.09913","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-12-28T16:15:42Z","cross_cats_sorted":["cs.CV","stat.ML"],"title_canon_sha256":"6bf95bbb7f33cc1bb51295c218f9a7172708c71d094645cf081a282651cad042","abstract_canon_sha256":"303541e3fa4f9b129acf43d75454eb48f11fb05199781456890d88f7261ecbcf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:23.493047Z","signature_b64":"iDfuAGUaBptVEtc846Hul6Dbc4OikZr8YgpogF8lnNiPFVHL2wKbOI8tTMt5pHhkDCbGz6/BOarIZtHdcF+FBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2e6a56494d1fe8a40fa1423f783041187a1f590299749c46f58a4964c0b0576c","last_reissued_at":"2026-05-18T00:01:23.492664Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:23.492664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Visualizing the Loss Landscape of Neural Nets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Christoph Studer, Gavin Taylor, Hao Li, Tom Goldstein, Zheng Xu","submitted_at":"2017-12-28T16:15:42Z","abstract_excerpt":"Neural network training relies on our ability to find \"good\" minimizers of highly non-convex loss functions. It is well-known that certain network architecture designs (e.g., skip connections) produce loss functions that train easier, and well-chosen training parameters (batch size, learning rate, optimizer) produce minimizers that generalize better. However, the reasons for these differences, and their effects on the underlying loss landscape, are not well understood. In this paper, we explore the structure of neural loss functions, and the effect of loss landscapes on generalization, using a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.09913","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1712.09913","created_at":"2026-05-18T00:01:23.492720+00:00"},{"alias_kind":"arxiv_version","alias_value":"1712.09913v3","created_at":"2026-05-18T00:01:23.492720+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.09913","created_at":"2026-05-18T00:01:23.492720+00:00"},{"alias_kind":"pith_short_12","alias_value":"FZVFMSKND7UK","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_16","alias_value":"FZVFMSKND7UKID5B","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_8","alias_value":"FZVFMSKN","created_at":"2026-05-18T12:31:15.632608+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"1906.10822","citing_title":"Gradient Noise Convolution (GNC): Smoothing Loss Function for Distributed Large-Batch SGD","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2212.08989","citing_title":"Deep learning applied to computational mechanics: A comprehensive review, state of the art, and the classics","ref_index":134,"is_internal_anchor":true},{"citing_arxiv_id":"2303.14511","citing_title":"Improving robustness of jet tagging algorithms with adversarial training: exploring the loss surface","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2511.17378","citing_title":"A Unified Stability Analysis of SAM vs SGD: Role of Data Coherence and Emergence of Simplicity Bias","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2010.01412","citing_title":"Sharpness-Aware Minimization for Efficiently Improving Generalization","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2603.21599","citing_title":"Conditional Wasserstein GAN for Simulating Neutrino Event Summaries using Incident Energy of Electron Neutrinos","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13434","citing_title":"Rescaled Asynchronous SGD: Optimal Distributed Optimization under Data and System Heterogeneity","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2212.04089","citing_title":"Editing Models with Task Arithmetic","ref_index":57,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB","json":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB.json","graph_json":"https://pith.science/api/pith-number/FZVFMSKND7UKID5BII7XQMCBDB/graph.json","events_json":"https://pith.science/api/pith-number/FZVFMSKND7UKID5BII7XQMCBDB/events.json","paper":"https://pith.science/paper/FZVFMSKN"},"agent_actions":{"view_html":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB","download_json":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB.json","view_paper":"https://pith.science/paper/FZVFMSKN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1712.09913&json=true","fetch_graph":"https://pith.science/api/pith-number/FZVFMSKND7UKID5BII7XQMCBDB/graph.json","fetch_events":"https://pith.science/api/pith-number/FZVFMSKND7UKID5BII7XQMCBDB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB/action/storage_attestation","attest_author":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB/action/author_attestation","sign_citation":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB/action/citation_signature","submit_replication":"https://pith.science/pith/FZVFMSKND7UKID5BII7XQMCBDB/action/replication_record"}},"created_at":"2026-05-18T00:01:23.492720+00:00","updated_at":"2026-05-18T00:01:23.492720+00:00"}