{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:2XXJU5B4QDFPIDAR3AJQ65OAJU","short_pith_number":"pith:2XXJU5B4","schema_version":"1.0","canonical_sha256":"d5ee9a743c80caf40c11d8130f75c04d2ef0b5b488a70d63c765bb9551656859","source":{"kind":"arxiv","id":"1611.07476","version":2},"attestation_state":"computed","paper":{"title":"Eigenvalues of the Hessian in Deep Learning: Singularity and Beyond","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Leon Bottou, Levent Sagun, Yann LeCun","submitted_at":"2016-11-22T19:24:49Z","abstract_excerpt":"We look at the eigenvalues of the Hessian of a loss function before and after training. The eigenvalue distribution is seen to be composed of two parts, the bulk which is concentrated around zero, and the edges which are scattered away from zero. We present empirical evidence for the bulk indicating how over-parametrized the system is, and for the edges that depend on the input data."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1611.07476","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-22T19:24:49Z","cross_cats_sorted":[],"title_canon_sha256":"5c1791bbfeab751510dd51d4afe15da8c093122ce0c50fd35a2a75e4304e8ec8","abstract_canon_sha256":"e7e287f7483d648e2ff2780a6ff6c012ff9497404b98f1acf2c41c9cb259d54f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:33:39.506549Z","signature_b64":"8glhxG9puu5swSQQygJoO5eCt+bshuIQ6XHdAom40t9W8VfFh9DqhwNMkH6oa0AdpFrcIROWUDXp9EFYdpLJDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d5ee9a743c80caf40c11d8130f75c04d2ef0b5b488a70d63c765bb9551656859","last_reissued_at":"2026-05-18T00:33:39.505864Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:33:39.505864Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Eigenvalues of the Hessian in Deep Learning: Singularity and Beyond","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Leon Bottou, Levent Sagun, Yann LeCun","submitted_at":"2016-11-22T19:24:49Z","abstract_excerpt":"We look at the eigenvalues of the Hessian of a loss function before and after training. The eigenvalue distribution is seen to be composed of two parts, the bulk which is concentrated around zero, and the edges which are scattered away from zero. We present empirical evidence for the bulk indicating how over-parametrized the system is, and for the edges that depend on the input data."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.07476","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1611.07476","created_at":"2026-05-18T00:33:39.505960+00:00"},{"alias_kind":"arxiv_version","alias_value":"1611.07476v2","created_at":"2026-05-18T00:33:39.505960+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.07476","created_at":"2026-05-18T00:33:39.505960+00:00"},{"alias_kind":"pith_short_12","alias_value":"2XXJU5B4QDFP","created_at":"2026-05-18T12:29:55.572404+00:00"},{"alias_kind":"pith_short_16","alias_value":"2XXJU5B4QDFPIDAR","created_at":"2026-05-18T12:29:55.572404+00:00"},{"alias_kind":"pith_short_8","alias_value":"2XXJU5B4","created_at":"2026-05-18T12:29:55.572404+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":13,"internal_anchor_count":10,"sample":[{"citing_arxiv_id":"1906.09069","citing_title":"First Exit Time Analysis of Stochastic Gradient Descent Under Heavy-Tailed Gradient Noise","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.23087","citing_title":"The Implicit Bias of Depth: From Neural Collapse to Softmax Codes","ref_index":59,"is_internal_anchor":true},{"citing_arxiv_id":"1907.02911","citing_title":"Weight-space symmetry in deep networks gives rise to permutation saddles, connected by equal-loss valleys across the loss landscape","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"1907.10732","citing_title":"Hessian based analysis of SGD for Deep Nets: Dynamics and Generalization","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2502.02345","citing_title":"Low Rank Based Subspace Inference for the Laplace Approximation of Bayesian Neural Networks","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22432","citing_title":"AMUSE: Anytime Muon with Stable Gradient Evaluation","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2602.18584","citing_title":"GIST: Targeted Data Selection for Instruction Tuning via Coupled Optimization Geometry","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2505.23737","citing_title":"On the Convergence Analysis of Muon","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2603.20527","citing_title":"RMNP: Row-Momentum Normalized Preconditioning for Scalable Matrix-Based Optimization","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13214","citing_title":"Backdoor Channels Hidden in Latent Space: Cryptographic Undetectability in Modern Neural Networks","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00650","citing_title":"AdaMeZO: Adam-style Zeroth-Order Optimizer for LLM Fine-tuning Without Maintaining the Moments","ref_index":32,"is_internal_anchor":false},{"citing_arxiv_id":"2604.18970","citing_title":"Mechanistic Anomaly Detection via Functional Attribution","ref_index":57,"is_internal_anchor":false},{"citing_arxiv_id":"2604.10202","citing_title":"Wolkowicz-Styan Upper Bound on the Hessian Eigenspectrum for Cross-Entropy Loss in Nonlinear Smooth Neural Networks","ref_index":47,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU","json":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU.json","graph_json":"https://pith.science/api/pith-number/2XXJU5B4QDFPIDAR3AJQ65OAJU/graph.json","events_json":"https://pith.science/api/pith-number/2XXJU5B4QDFPIDAR3AJQ65OAJU/events.json","paper":"https://pith.science/paper/2XXJU5B4"},"agent_actions":{"view_html":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU","download_json":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU.json","view_paper":"https://pith.science/paper/2XXJU5B4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1611.07476&json=true","fetch_graph":"https://pith.science/api/pith-number/2XXJU5B4QDFPIDAR3AJQ65OAJU/graph.json","fetch_events":"https://pith.science/api/pith-number/2XXJU5B4QDFPIDAR3AJQ65OAJU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU/action/storage_attestation","attest_author":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU/action/author_attestation","sign_citation":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU/action/citation_signature","submit_replication":"https://pith.science/pith/2XXJU5B4QDFPIDAR3AJQ65OAJU/action/replication_record"}},"created_at":"2026-05-18T00:33:39.505960+00:00","updated_at":"2026-05-18T00:33:39.505960+00:00"}