{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:PT6R2GSCJQCM7RXOLU3ZJKQYA2","short_pith_number":"pith:PT6R2GSC","schema_version":"1.0","canonical_sha256":"7cfd1d1a424c04cfc6ee5d3794aa180691922fa127aafebc18a757c20b5ce27c","source":{"kind":"arxiv","id":"2505.15134","version":1},"attestation_state":"computed","paper":{"title":"The Unreasonable Effectiveness of Entropy Minimization in LLM Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Hao Peng, Jiawei Han, Lifan Yuan, Shivam Agarwal, Zimin Zhang","submitted_at":"2025-05-21T05:39:11Z","abstract_excerpt":"Entropy minimization (EM) trains the model to concentrate even more probability mass on its most confident outputs. We show that this simple objective alone, without any labeled data, can substantially improve large language models' (LLMs) performance on challenging math, physics, and coding tasks. We explore three approaches: (1) EM-FT minimizes token-level entropy similarly to instruction finetuning, but on unlabeled outputs drawn from the model; (2) EM-RL: reinforcement learning with negative entropy as the only reward to maximize; (3) EM-INF: inference-time logit adjustment to reduce entro"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2505.15134","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-05-21T05:39:11Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d9894eeabec8995c7a6457057dd3de41a67a130d074e20fc4b97dd3816ad5c8b","abstract_canon_sha256":"3fd25ebf71377a64e7fa02fe570c2d7f7479b2e199593ac0b9998a78f98797a0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T15:51:37.296888Z","signature_b64":"u7RA2ehIyPSyBqE0KX8YlACpWlVpPk3fJgmKgYhxonr7YgBs7Wc87+zOj4MYo5EfGMvtC9JL8Wcb/f38PBB/CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7cfd1d1a424c04cfc6ee5d3794aa180691922fa127aafebc18a757c20b5ce27c","last_reissued_at":"2026-05-18T15:51:37.294792Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T15:51:37.294792Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Unreasonable Effectiveness of Entropy Minimization in LLM Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Hao Peng, Jiawei Han, Lifan Yuan, Shivam Agarwal, Zimin Zhang","submitted_at":"2025-05-21T05:39:11Z","abstract_excerpt":"Entropy minimization (EM) trains the model to concentrate even more probability mass on its most confident outputs. We show that this simple objective alone, without any labeled data, can substantially improve large language models' (LLMs) performance on challenging math, physics, and coding tasks. We explore three approaches: (1) EM-FT minimizes token-level entropy similarly to instruction finetuning, but on unlabeled outputs drawn from the model; (2) EM-RL: reinforcement learning with negative entropy as the only reward to maximize; (3) EM-INF: inference-time logit adjustment to reduce entro"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.15134","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2505.15134","created_at":"2026-05-18T15:51:37.294914+00:00"},{"alias_kind":"arxiv_version","alias_value":"2505.15134v1","created_at":"2026-05-18T15:51:37.294914+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.15134","created_at":"2026-05-18T15:51:37.294914+00:00"},{"alias_kind":"pith_short_12","alias_value":"PT6R2GSCJQCM","created_at":"2026-05-18T15:51:37.294914+00:00"},{"alias_kind":"pith_short_16","alias_value":"PT6R2GSCJQCM7RXO","created_at":"2026-05-18T15:51:37.294914+00:00"},{"alias_kind":"pith_short_8","alias_value":"PT6R2GSC","created_at":"2026-05-18T15:51:37.294914+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":21,"internal_anchor_count":21,"sample":[{"citing_arxiv_id":"2509.05489","citing_title":"Self-Aligned Reward: Towards Effective and Efficient Reasoners","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2509.14234","citing_title":"Compute as Teacher: Turning Inference Compute Into Reference-Free Supervision","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2509.21882","citing_title":"Position: The Hidden Costs and Measurement Gaps of Reinforcement Learning with Verifiable Rewards","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2509.20265","citing_title":"Failure Modes of Maximum Entropy RLHF","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2510.18814","citing_title":"A Model Can Help Itself: Reward-Free Self-Training for LLM Reasoning","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2509.08827","citing_title":"A Survey of Reinforcement Learning for Large Reasoning Models","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2602.06019","citing_title":"Multi-Token Prediction via Self-Distillation","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06192","citing_title":"The Stepwise Informativeness Assumption: Why are Entropy Dynamics and Reasoning Correlated in LLMs?","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2603.27977","citing_title":"SARL: Label-Free Reinforcement Learning by Rewarding Reasoning Topology","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03993","citing_title":"Can LLMs Learn to Reason Robustly under Noisy Supervision?","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11461","citing_title":"Breaking $\\textit{Winner-Takes-All}$: Cooperative Policy Optimization Improves Diverse LLM Reasoning","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08186","citing_title":"Rethinking Entropy Minimization in Test-Time Adaptation for Autoregressive Models","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2503.09567","citing_title":"Towards Reasoning Era: A Survey of Long Chain-of-Thought for Reasoning Large Language Models","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06241","citing_title":"Rethinking RL for LLM Reasoning: It's Sparse Policy Selection, Not Capability Learning","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00195","citing_title":"Diversity in Large Language Models under Supervised Fine-Tuning","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06241","citing_title":"Rethinking RL for LLM Reasoning: It's Sparse Policy Selection, Not Capability Learning","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04065","citing_title":"Free Energy-Driven Reinforcement Learning with Adaptive Advantage Shaping for Unsupervised Reasoning in LLMs","ref_index":58,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07244","citing_title":"Experience Sharing in Mutual Reinforcement Learning for Heterogeneous Language Models","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18493","citing_title":"Too Correct to Learn: Reinforcement Learning on Saturated Reasoning Data","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.17928","citing_title":"HEALing Entropy Collapse: Enhancing Exploration in Few-Shot RLVR via Hybrid-Domain Entropy Dynamics Alignment","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00195","citing_title":"Diversity in Large Language Models under Supervised Fine-Tuning","ref_index":30,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2","json":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2.json","graph_json":"https://pith.science/api/pith-number/PT6R2GSCJQCM7RXOLU3ZJKQYA2/graph.json","events_json":"https://pith.science/api/pith-number/PT6R2GSCJQCM7RXOLU3ZJKQYA2/events.json","paper":"https://pith.science/paper/PT6R2GSC"},"agent_actions":{"view_html":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2","download_json":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2.json","view_paper":"https://pith.science/paper/PT6R2GSC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2505.15134&json=true","fetch_graph":"https://pith.science/api/pith-number/PT6R2GSCJQCM7RXOLU3ZJKQYA2/graph.json","fetch_events":"https://pith.science/api/pith-number/PT6R2GSCJQCM7RXOLU3ZJKQYA2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2/action/storage_attestation","attest_author":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2/action/author_attestation","sign_citation":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2/action/citation_signature","submit_replication":"https://pith.science/pith/PT6R2GSCJQCM7RXOLU3ZJKQYA2/action/replication_record"}},"created_at":"2026-05-18T15:51:37.294914+00:00","updated_at":"2026-05-18T15:51:37.294914+00:00"}