{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:BHSZP2HLN4GUICOBITZSQALFSK","short_pith_number":"pith:BHSZP2HL","schema_version":"1.0","canonical_sha256":"09e597e8eb6f0d4409c144f328016592a01eb29f63da4b0ee922f7c9aebb7a66","source":{"kind":"arxiv","id":"1502.04390","version":2},"attestation_state":"computed","paper":{"title":"Equilibrated adaptive learning rates for non-convex optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NA"],"primary_cat":"cs.LG","authors_text":"Harm de Vries, Yann N. Dauphin, Yoshua Bengio","submitted_at":"2015-02-15T23:41:33Z","abstract_excerpt":"Parameter-specific adaptive learning rate methods are computationally efficient ways to reduce the ill-conditioning problems encountered when training large deep networks. Following recent work that strongly suggests that most of the critical points encountered when training such networks are saddle points, we find how considering the presence of negative eigenvalues of the Hessian could help us design better suited adaptive learning rate schemes. We show that the popular Jacobi preconditioner has undesirable behavior in the presence of both positive and negative curvature, and present theoret"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1502.04390","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-02-15T23:41:33Z","cross_cats_sorted":["cs.NA"],"title_canon_sha256":"0cfc00d7546c94883e7b12784dd67868c0546328032a18e2ea4f741547cc3979","abstract_canon_sha256":"3b89e06210014d8eb017527f90b89c78d717ea56504ccbd031f2dd1eec837bd3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:34:35.089679Z","signature_b64":"MzRI5dyoKRxhRdSNkG9BUQlPOrtNCo1TYWCAKTkH+tOT/k9roCE9H8+jbQ8HwczuW0BeSQfUWspL3Q5GPXUmAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"09e597e8eb6f0d4409c144f328016592a01eb29f63da4b0ee922f7c9aebb7a66","last_reissued_at":"2026-05-18T01:34:35.089105Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:34:35.089105Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Equilibrated adaptive learning rates for non-convex optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NA"],"primary_cat":"cs.LG","authors_text":"Harm de Vries, Yann N. Dauphin, Yoshua Bengio","submitted_at":"2015-02-15T23:41:33Z","abstract_excerpt":"Parameter-specific adaptive learning rate methods are computationally efficient ways to reduce the ill-conditioning problems encountered when training large deep networks. Following recent work that strongly suggests that most of the critical points encountered when training such networks are saddle points, we find how considering the presence of negative eigenvalues of the Hessian could help us design better suited adaptive learning rate schemes. We show that the popular Jacobi preconditioner has undesirable behavior in the presence of both positive and negative curvature, and present theoret"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1502.04390","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1502.04390","created_at":"2026-05-18T01:34:35.089197+00:00"},{"alias_kind":"arxiv_version","alias_value":"1502.04390v2","created_at":"2026-05-18T01:34:35.089197+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1502.04390","created_at":"2026-05-18T01:34:35.089197+00:00"},{"alias_kind":"pith_short_12","alias_value":"BHSZP2HLN4GU","created_at":"2026-05-18T12:29:14.074870+00:00"},{"alias_kind":"pith_short_16","alias_value":"BHSZP2HLN4GUICOB","created_at":"2026-05-18T12:29:14.074870+00:00"},{"alias_kind":"pith_short_8","alias_value":"BHSZP2HL","created_at":"2026-05-18T12:29:14.074870+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.08674","citing_title":"Deep Learning to Address Candidate Generation and Cold Start Challenges in Recommender Systems: A Research Survey","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"1511.06939","citing_title":"Session-based Recommendations with Recurrent Neural Networks","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"1608.03983","citing_title":"SGDR: Stochastic Gradient Descent with Warm Restarts","ref_index":2,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK","json":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK.json","graph_json":"https://pith.science/api/pith-number/BHSZP2HLN4GUICOBITZSQALFSK/graph.json","events_json":"https://pith.science/api/pith-number/BHSZP2HLN4GUICOBITZSQALFSK/events.json","paper":"https://pith.science/paper/BHSZP2HL"},"agent_actions":{"view_html":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK","download_json":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK.json","view_paper":"https://pith.science/paper/BHSZP2HL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1502.04390&json=true","fetch_graph":"https://pith.science/api/pith-number/BHSZP2HLN4GUICOBITZSQALFSK/graph.json","fetch_events":"https://pith.science/api/pith-number/BHSZP2HLN4GUICOBITZSQALFSK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK/action/storage_attestation","attest_author":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK/action/author_attestation","sign_citation":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK/action/citation_signature","submit_replication":"https://pith.science/pith/BHSZP2HLN4GUICOBITZSQALFSK/action/replication_record"}},"created_at":"2026-05-18T01:34:35.089197+00:00","updated_at":"2026-05-18T01:34:35.089197+00:00"}