{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:W5B4B4TEPSC2WLXRKBKTJUFNTB","short_pith_number":"pith:W5B4B4TE","schema_version":"1.0","canonical_sha256":"b743c0f2647c85ab2ef1505534d0ad9865203cb2deab6de640be116667d6d1b0","source":{"kind":"arxiv","id":"2602.05600","version":2},"attestation_state":"computed","paper":{"title":"On the Superlinear Relationship between SGD Noise Covariance and Loss Landscape Curvature","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Ning Yang, Yikuan Zhang, Yuhai Tu","submitted_at":"2026-02-05T12:35:13Z","abstract_excerpt":"Stochastic Gradient Descent (SGD) introduces anisotropic noise that is correlated with the local curvature of the loss landscape, thereby biasing optimization toward flat minima. Prior work often assumes an equivalence between the Fisher Information Matrix and the Hessian for negative log-likelihood losses, leading to the claim that the SGD noise covariance $\\mathbf{C}$ is proportional to the Hessian $\\mathbf{H}$. We show that this assumption holds only under restrictive conditions that are typically violated in deep neural networks. Using the recently discovered Activity--Weight Duality, we f"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.05600","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-05T12:35:13Z","cross_cats_sorted":[],"title_canon_sha256":"e53ab1b4121358011d9ecfa0671cf002cb747ebf095350de27218238d3da40f9","abstract_canon_sha256":"7230c8318ade56baea0ac0775a75596254273cca980d5b69a291fcf4cc0326c6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:13.932462Z","signature_b64":"XL5wyWUAa0IaJnbseBPz+ZJb2glqC7XETYeyzINYF0LgJ0JFiJAC17klsCWbbjgQheklb/LMwrxE9be44NuBCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b743c0f2647c85ab2ef1505534d0ad9865203cb2deab6de640be116667d6d1b0","last_reissued_at":"2026-06-09T01:05:13.932009Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:13.932009Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"On the Superlinear Relationship between SGD Noise Covariance and Loss Landscape Curvature","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Ning Yang, Yikuan Zhang, Yuhai Tu","submitted_at":"2026-02-05T12:35:13Z","abstract_excerpt":"Stochastic Gradient Descent (SGD) introduces anisotropic noise that is correlated with the local curvature of the loss landscape, thereby biasing optimization toward flat minima. Prior work often assumes an equivalence between the Fisher Information Matrix and the Hessian for negative log-likelihood losses, leading to the claim that the SGD noise covariance $\\mathbf{C}$ is proportional to the Hessian $\\mathbf{H}$. We show that this assumption holds only under restrictive conditions that are typically violated in deep neural networks. Using the recently discovered Activity--Weight Duality, we f"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.05600","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.05600/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.05600","created_at":"2026-06-09T01:05:13.932068+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.05600v2","created_at":"2026-06-09T01:05:13.932068+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.05600","created_at":"2026-06-09T01:05:13.932068+00:00"},{"alias_kind":"pith_short_12","alias_value":"W5B4B4TEPSC2","created_at":"2026-06-09T01:05:13.932068+00:00"},{"alias_kind":"pith_short_16","alias_value":"W5B4B4TEPSC2WLXR","created_at":"2026-06-09T01:05:13.932068+00:00"},{"alias_kind":"pith_short_8","alias_value":"W5B4B4TE","created_at":"2026-06-09T01:05:13.932068+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.27739","citing_title":"Worker Disagreement Reveals Sharp Directions in Local SGD","ref_index":18,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB","json":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB.json","graph_json":"https://pith.science/api/pith-number/W5B4B4TEPSC2WLXRKBKTJUFNTB/graph.json","events_json":"https://pith.science/api/pith-number/W5B4B4TEPSC2WLXRKBKTJUFNTB/events.json","paper":"https://pith.science/paper/W5B4B4TE"},"agent_actions":{"view_html":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB","download_json":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB.json","view_paper":"https://pith.science/paper/W5B4B4TE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.05600&json=true","fetch_graph":"https://pith.science/api/pith-number/W5B4B4TEPSC2WLXRKBKTJUFNTB/graph.json","fetch_events":"https://pith.science/api/pith-number/W5B4B4TEPSC2WLXRKBKTJUFNTB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB/action/storage_attestation","attest_author":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB/action/author_attestation","sign_citation":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB/action/citation_signature","submit_replication":"https://pith.science/pith/W5B4B4TEPSC2WLXRKBKTJUFNTB/action/replication_record"}},"created_at":"2026-06-09T01:05:13.932068+00:00","updated_at":"2026-06-09T01:05:13.932068+00:00"}