{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:6TJHK3DANTSQHPOGC26YROMW27","short_pith_number":"pith:6TJHK3DA","schema_version":"1.0","canonical_sha256":"f4d2756c606ce503bdc616bd88b996d7c9492cf44ff1b13487e7fd7cc71cc5ea","source":{"kind":"arxiv","id":"2606.19750","version":1},"attestation_state":"computed","paper":{"title":"Manifold Bandits: Bayesian Curriculum Learning over the Latent Geometry of Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Darrien McKenzie, Nicklas Hansen, Xiaolong Wang","submitted_at":"2026-06-18T03:31:19Z","abstract_excerpt":"Reinforcement learning (RL) is a central approach for improving reasoning capabilities in large language models (LLMs), where training efficiency depends critically on how problems are sampled during optimization. Existing adaptive curriculum learning methods typically prioritize prompts of intermediate difficulty, treating problem selection as a standard bandit problem with independent arms and overlooking the structured, heterogeneous nature of the task space. In this work, we frame problem sampling as a manifold-structured bandit problem with endogenous non-stationarity: problems are relate"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.19750","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-18T03:31:19Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"6f58aa9aa410f859be3d6cd02990fa3484c2aa13525b82862f35d323b34d4260","abstract_canon_sha256":"bcc2d13a0218a2881c2baeb340c1b118c0cc79cf17a3cfb94c598e001dcf6175"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:34.067922Z","signature_b64":"Sg+b7rqbwwFz2CeAdg2ozPxlkoUmmdYnjFtAtoFsAA4NcMlollf8NYWwLTlBMzfiSo3bCE4KjXXlfu3eVWLuAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f4d2756c606ce503bdc616bd88b996d7c9492cf44ff1b13487e7fd7cc71cc5ea","last_reissued_at":"2026-06-19T16:12:34.067480Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:34.067480Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Manifold Bandits: Bayesian Curriculum Learning over the Latent Geometry of Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Darrien McKenzie, Nicklas Hansen, Xiaolong Wang","submitted_at":"2026-06-18T03:31:19Z","abstract_excerpt":"Reinforcement learning (RL) is a central approach for improving reasoning capabilities in large language models (LLMs), where training efficiency depends critically on how problems are sampled during optimization. Existing adaptive curriculum learning methods typically prioritize prompts of intermediate difficulty, treating problem selection as a standard bandit problem with independent arms and overlooking the structured, heterogeneous nature of the task space. In this work, we frame problem sampling as a manifold-structured bandit problem with endogenous non-stationarity: problems are relate"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.19750","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.19750/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.19750","created_at":"2026-06-19T16:12:34.067537+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.19750v1","created_at":"2026-06-19T16:12:34.067537+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.19750","created_at":"2026-06-19T16:12:34.067537+00:00"},{"alias_kind":"pith_short_12","alias_value":"6TJHK3DANTSQ","created_at":"2026-06-19T16:12:34.067537+00:00"},{"alias_kind":"pith_short_16","alias_value":"6TJHK3DANTSQHPOG","created_at":"2026-06-19T16:12:34.067537+00:00"},{"alias_kind":"pith_short_8","alias_value":"6TJHK3DA","created_at":"2026-06-19T16:12:34.067537+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27","json":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27.json","graph_json":"https://pith.science/api/pith-number/6TJHK3DANTSQHPOGC26YROMW27/graph.json","events_json":"https://pith.science/api/pith-number/6TJHK3DANTSQHPOGC26YROMW27/events.json","paper":"https://pith.science/paper/6TJHK3DA"},"agent_actions":{"view_html":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27","download_json":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27.json","view_paper":"https://pith.science/paper/6TJHK3DA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.19750&json=true","fetch_graph":"https://pith.science/api/pith-number/6TJHK3DANTSQHPOGC26YROMW27/graph.json","fetch_events":"https://pith.science/api/pith-number/6TJHK3DANTSQHPOGC26YROMW27/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27/action/storage_attestation","attest_author":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27/action/author_attestation","sign_citation":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27/action/citation_signature","submit_replication":"https://pith.science/pith/6TJHK3DANTSQHPOGC26YROMW27/action/replication_record"}},"created_at":"2026-06-19T16:12:34.067537+00:00","updated_at":"2026-06-19T16:12:34.067537+00:00"}