{"total":10,"items":[{"citing_arxiv_id":"2605.06139","ref_index":32,"ref_count":2,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Listwise Policy Optimization: Group-based RLVR as Target-Projection on the LLM Response Simplex","primary_cat":"cs.LG","submitted_at":"2026-05-07T12:38:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Listwise Policy Optimization explicitly performs target-projection on the LLM response simplex, unifying and improving group-based RLVR methods with monotonic improvement and flexible divergences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05893","ref_index":53,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Logic-Regularized Verifier Elicits Reasoning from LLMs","primary_cat":"cs.CL","submitted_at":"2026-05-07T09:03:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LOVER creates an unsupervised logic-regularized verifier that reaches 95% of supervised verifier performance on reasoning tasks across 10 datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21510","ref_index":85,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"OptiVerse: A Comprehensive Benchmark towards Optimization Problem Solving","primary_cat":"cs.CL","submitted_at":"2026-04-23T10:12:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"OptiVerse is a new benchmark spanning neglected optimization domains that shows LLMs suffer sharp accuracy drops on hard problems due to modeling and logic errors, with a Dual-View Auditor Agent proposed to improve performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20183","ref_index":69,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Dual-Cluster Memory Agent: Resolving Multi-Paradigm Ambiguity in Optimization Problem Solving","primary_cat":"cs.CL","submitted_at":"2026-04-22T04:55:31+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17928","ref_index":86,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"HEALing Entropy Collapse: Enhancing Exploration in Few-Shot RLVR via Hybrid-Domain Entropy Dynamics Alignment","primary_cat":"cs.LG","submitted_at":"2026-04-20T08:09:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HEAL mitigates entropy collapse in few-shot RLVR by selectively adding general-domain data and aligning trajectory-level entropy dynamics, matching full-shot performance with 32 target samples.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.10248","ref_index":93,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Step-Video-T2V Technical Report: The Practice, Challenges, and Future of Video Foundation Model","primary_cat":"cs.CV","submitted_at":"2025-02-14T15:58:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Step-Video-T2V describes a 30B-parameter text-to-video model with custom Video-VAE, 3D DiT, flow matching, and Video-DPO that claims state-of-the-art results on a new internal benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.03387","ref_index":26,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"LIMO: Less is More for Reasoning","primary_cat":"cs.CL","submitted_at":"2025-02-05T17:23:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LIMO achieves 63.3% on AIME24 and 95.6% on MATH500 via supervised fine-tuning on roughly 1% of the data used by prior models, supporting the claim that minimal strategic examples suffice when pre-training has already encoded domain knowledge.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.02737","ref_index":18,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"SmolLM2: When Smol Goes Big -- Data-Centric Training of a Small Language Model","primary_cat":"cs.CL","submitted_at":"2025-02-04T21:43:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SmolLM2 is a 1.7B-parameter language model that outperforms Qwen2.5-1.5B and Llama3.2-1B after overtraining on 11 trillion tokens using custom FineMath, Stack-Edu, and SmolTalk datasets in a multi-stage pipeline.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.01456","ref_index":138,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Process Reinforcement through Implicit Rewards","primary_cat":"cs.LG","submitted_at":"2025-02-03T15:43:48+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PRIME enables online process reward model updates in LLM RL using implicit rewards from rollouts and outcome labels, yielding 15.1% average gains on reasoning benchmarks and surpassing a stronger instruct model with 10% of the data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2405.14782","ref_index":176,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Lessons from the Trenches on Reproducible Evaluation of Language Models","primary_cat":"cs.CL","submitted_at":"2024-05-23T16:50:49+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}