{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:W4YEUTWCV3ZNNQPMTLNQYWGNH4","short_pith_number":"pith:W4YEUTWC","schema_version":"1.0","canonical_sha256":"b7304a4ec2aef2d6c1ec9adb0c58cd3f1158d9589a38b2f5c8c091b5c5c79308","source":{"kind":"arxiv","id":"1706.10239","version":2},"attestation_state":"computed","paper":{"title":"Towards Understanding Generalization of Deep Learning: Perspective of Loss Landscapes","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Lei Wu, Weinan E, Zhanxing Zhu","submitted_at":"2017-06-30T15:30:21Z","abstract_excerpt":"It is widely observed that deep learning models with learned parameters generalize well, even with much more model parameters than the number of training samples. We systematically investigate the underlying reasons why deep neural networks often generalize well, and reveal the difference between the minima (with the same training error) that generalize well and those they don't. We show that it is the characteristics the landscape of the loss function that explains the good generalization capability. For the landscape of loss function for deep networks, the volume of basin of attraction of go"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1706.10239","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/publicdomain/zero/1.0/","primary_cat":"cs.LG","submitted_at":"2017-06-30T15:30:21Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"150ee797a99c40aa1b5a5557488c0a9cfa873c63078e413a76dd3123784b2dc3","abstract_canon_sha256":"ad0dc833fa016e81e7339c74851fe343a71a28c96325db09e614074479c6d9c6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:30.123473Z","signature_b64":"GiS2lOtds/FqwaLenhl68ZmfXryfsCfXiYmh7LvOnMkVAdTyPn9ubpZDx/FKUhPg50GU5t5jkYgsDKTPQLLhDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b7304a4ec2aef2d6c1ec9adb0c58cd3f1158d9589a38b2f5c8c091b5c5c79308","last_reissued_at":"2026-05-18T00:29:30.122734Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:30.122734Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Understanding Generalization of Deep Learning: Perspective of Loss Landscapes","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Lei Wu, Weinan E, Zhanxing Zhu","submitted_at":"2017-06-30T15:30:21Z","abstract_excerpt":"It is widely observed that deep learning models with learned parameters generalize well, even with much more model parameters than the number of training samples. We systematically investigate the underlying reasons why deep neural networks often generalize well, and reveal the difference between the minima (with the same training error) that generalize well and those they don't. We show that it is the characteristics the landscape of the loss function that explains the good generalization capability. For the landscape of loss function for deep networks, the volume of basin of attraction of go"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.10239","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1706.10239","created_at":"2026-05-18T00:29:30.122848+00:00"},{"alias_kind":"arxiv_version","alias_value":"1706.10239v2","created_at":"2026-05-18T00:29:30.122848+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.10239","created_at":"2026-05-18T00:29:30.122848+00:00"},{"alias_kind":"pith_short_12","alias_value":"W4YEUTWCV3ZN","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_16","alias_value":"W4YEUTWCV3ZNNQPM","created_at":"2026-05-18T12:31:53.515858+00:00"},{"alias_kind":"pith_short_8","alias_value":"W4YEUTWC","created_at":"2026-05-18T12:31:53.515858+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.06572","citing_title":"Deep network as memory space: complexity, generalization, disentangled representation and interpretability","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07914","citing_title":"Flatness and Gradient Alignment Are Both Necessary: Spectral-Aware Gradient-Aligned Exploration for Multi-Distribution Learning","ref_index":3,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4","json":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4.json","graph_json":"https://pith.science/api/pith-number/W4YEUTWCV3ZNNQPMTLNQYWGNH4/graph.json","events_json":"https://pith.science/api/pith-number/W4YEUTWCV3ZNNQPMTLNQYWGNH4/events.json","paper":"https://pith.science/paper/W4YEUTWC"},"agent_actions":{"view_html":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4","download_json":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4.json","view_paper":"https://pith.science/paper/W4YEUTWC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1706.10239&json=true","fetch_graph":"https://pith.science/api/pith-number/W4YEUTWCV3ZNNQPMTLNQYWGNH4/graph.json","fetch_events":"https://pith.science/api/pith-number/W4YEUTWCV3ZNNQPMTLNQYWGNH4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4/action/storage_attestation","attest_author":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4/action/author_attestation","sign_citation":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4/action/citation_signature","submit_replication":"https://pith.science/pith/W4YEUTWCV3ZNNQPMTLNQYWGNH4/action/replication_record"}},"created_at":"2026-05-18T00:29:30.122848+00:00","updated_at":"2026-05-18T00:29:30.122848+00:00"}