{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:BRE35RWMEOOE3D3ER3GXTXI7IO","short_pith_number":"pith:BRE35RWM","schema_version":"1.0","canonical_sha256":"0c49bec6cc239c4d8f648ecd79dd1f43ba9c3cfe24e911100c8b9c223c99a27a","source":{"kind":"arxiv","id":"1711.02257","version":4},"attestation_state":"computed","paper":{"title":"GradNorm: Gradient Normalization for Adaptive Loss Balancing in Deep Multitask Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Andrew Rabinovich, Chen-Yu Lee, Vijay Badrinarayanan, Zhao Chen","submitted_at":"2017-11-07T02:08:12Z","abstract_excerpt":"Deep multitask networks, in which one neural network produces multiple predictive outputs, can offer better speed and performance than their single-task counterparts but are challenging to train properly. We present a gradient normalization (GradNorm) algorithm that automatically balances training in deep multitask models by dynamically tuning gradient magnitudes. We show that for various network architectures, for both regression and classification tasks, and on both synthetic and real datasets, GradNorm improves accuracy and reduces overfitting across multiple tasks when compared to single-t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.02257","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-11-07T02:08:12Z","cross_cats_sorted":[],"title_canon_sha256":"3dd1e06abb8968875118ab0d2836fa68110834865fae413fcb29e3e58c3269e6","abstract_canon_sha256":"30ad3e4c706cf413da85030f4476ce2a51dcaabadfb013a93a53c59f3cd40cb4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:10:48.882368Z","signature_b64":"q6OwSy6sxkTaaln/8/xpSVRAhk3e7S4hdhmeopLQ7x93ADcBBwtr7nkWosf2x/A0rMHT2i2iUYVOcEcVOvt7AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0c49bec6cc239c4d8f648ecd79dd1f43ba9c3cfe24e911100c8b9c223c99a27a","last_reissued_at":"2026-05-18T00:10:48.881783Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:10:48.881783Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GradNorm: Gradient Normalization for Adaptive Loss Balancing in Deep Multitask Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Andrew Rabinovich, Chen-Yu Lee, Vijay Badrinarayanan, Zhao Chen","submitted_at":"2017-11-07T02:08:12Z","abstract_excerpt":"Deep multitask networks, in which one neural network produces multiple predictive outputs, can offer better speed and performance than their single-task counterparts but are challenging to train properly. We present a gradient normalization (GradNorm) algorithm that automatically balances training in deep multitask models by dynamically tuning gradient magnitudes. We show that for various network architectures, for both regression and classification tasks, and on both synthetic and real datasets, GradNorm improves accuracy and reduces overfitting across multiple tasks when compared to single-t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.02257","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.02257","created_at":"2026-05-18T00:10:48.881872+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.02257v4","created_at":"2026-05-18T00:10:48.881872+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.02257","created_at":"2026-05-18T00:10:48.881872+00:00"},{"alias_kind":"pith_short_12","alias_value":"BRE35RWMEOOE","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_16","alias_value":"BRE35RWMEOOE3D3E","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_8","alias_value":"BRE35RWM","created_at":"2026-05-18T12:31:08.081275+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"1906.09417","citing_title":"Keyword Spotting for Hearing Assistive Devices Robust to External Speakers","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2509.17189","citing_title":"Toward a unified data-driven turbulence model through multi-objective learning","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2511.02153","citing_title":"A Joint Variational Framework for Multimodal X-ray Ptychography and Fluorescence Reconstruction","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2603.07433","citing_title":"Data Agent: Learning to Select Data via End-to-End Dynamic Optimization","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2604.05807","citing_title":"Constraint-Driven Warm-Freeze for Efficient Transfer Learning in Photovoltaic Systems","ref_index":25,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15785","citing_title":"Probabilistic Upscaling of Hydrodynamics in Geological Fractures Under Uncertainty","ref_index":29,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO","json":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO.json","graph_json":"https://pith.science/api/pith-number/BRE35RWMEOOE3D3ER3GXTXI7IO/graph.json","events_json":"https://pith.science/api/pith-number/BRE35RWMEOOE3D3ER3GXTXI7IO/events.json","paper":"https://pith.science/paper/BRE35RWM"},"agent_actions":{"view_html":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO","download_json":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO.json","view_paper":"https://pith.science/paper/BRE35RWM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.02257&json=true","fetch_graph":"https://pith.science/api/pith-number/BRE35RWMEOOE3D3ER3GXTXI7IO/graph.json","fetch_events":"https://pith.science/api/pith-number/BRE35RWMEOOE3D3ER3GXTXI7IO/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO/action/storage_attestation","attest_author":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO/action/author_attestation","sign_citation":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO/action/citation_signature","submit_replication":"https://pith.science/pith/BRE35RWMEOOE3D3ER3GXTXI7IO/action/replication_record"}},"created_at":"2026-05-18T00:10:48.881872+00:00","updated_at":"2026-05-18T00:10:48.881872+00:00"}