{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AY2VTHFUKUOZIPRQPIJHIRBPCM","short_pith_number":"pith:AY2VTHFU","canonical_record":{"source":{"id":"2606.03102","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T03:42:04Z","cross_cats_sorted":[],"title_canon_sha256":"edeb73f909d3fff31a6f24be7fbf967bd059c061754d492226eb4c12613ca7c7","abstract_canon_sha256":"b1a1d3ee10ca12812d2c3866c9985e32ba5af2294bff00a70148b6e5e225c843"},"schema_version":"1.0"},"canonical_sha256":"0635599cb4551d943e307a1274442f1330c947971d74b772bc90a3ac7ba03229","source":{"kind":"arxiv","id":"2606.03102","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03102","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03102v1","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03102","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"pith_short_12","alias_value":"AY2VTHFUKUOZ","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"pith_short_16","alias_value":"AY2VTHFUKUOZIPRQ","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"pith_short_8","alias_value":"AY2VTHFU","created_at":"2026-06-03T01:05:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AY2VTHFUKUOZIPRQPIJHIRBPCM","target":"record","payload":{"canonical_record":{"source":{"id":"2606.03102","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T03:42:04Z","cross_cats_sorted":[],"title_canon_sha256":"edeb73f909d3fff31a6f24be7fbf967bd059c061754d492226eb4c12613ca7c7","abstract_canon_sha256":"b1a1d3ee10ca12812d2c3866c9985e32ba5af2294bff00a70148b6e5e225c843"},"schema_version":"1.0"},"canonical_sha256":"0635599cb4551d943e307a1274442f1330c947971d74b772bc90a3ac7ba03229","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:31.728473Z","signature_b64":"sXR8TxiAdccUEc0dJhO90KmqxQnuFLa0rjsj6wSODvgmtDsi2LIQAIyV3pwxQkrSYckEhrIV8reny+jAgB6yBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0635599cb4551d943e307a1274442f1330c947971d74b772bc90a3ac7ba03229","last_reissued_at":"2026-06-03T01:05:31.728094Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:31.728094Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.03102","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T01:05:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hOJKQsB+WmOHsbFGl7F1g8Ws+5igIFz3maTDFZ+0rkTTVjO+t0dJIZpvShEFPDGbyVBZMg1P0JDl39nqBO8YAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T21:05:19.549963Z"},"content_sha256":"cd4a1406261af3cff5d1c97a9b2533504157f4b79352ca6030038e825a35a129","schema_version":"1.0","event_id":"sha256:cd4a1406261af3cff5d1c97a9b2533504157f4b79352ca6030038e825a35a129"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AY2VTHFUKUOZIPRQPIJHIRBPCM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Small RL Controller, Large Language Model: RL-Guided Adaptive Sampling for Test-Time Scaling","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chengsong Huang, Hongtu Zhu, Rui Liu, Runpeng Dai, Tong Zheng","submitted_at":"2026-06-02T03:42:04Z","abstract_excerpt":"Test-time scaling improves the reasoning performance of large language models but incurs substantial cost in both total computation and latency. Existing adaptive sampling methods partially mitigate this issue by dynamically deciding when to stop sampling, yet they typically rely on heuristic rules or rely on distribution assumptions. In this work, we formulate adaptive sampling as a Markov decision process (MDP). We train a lightweight sampling controller with reinforcement learning (RL) to jointly balance answer correctness, latency, and computation cost. At each round, the controller decide"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03102","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.03102/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T01:05:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hgyRv8JKkesQ2DS4g+6/cEoZNMbUxerFa6h7aDZZEJlP67yODCrKpUlDcZAHmg+nW694x0fT8bL23JE0IehTAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T21:05:19.550396Z"},"content_sha256":"f2ca6dce9fb184171e0c2886b5737c86afeac3b6426889d0d3870644d57d8d18","schema_version":"1.0","event_id":"sha256:f2ca6dce9fb184171e0c2886b5737c86afeac3b6426889d0d3870644d57d8d18"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM/bundle.json","state_url":"https://pith.science/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T21:05:19Z","links":{"resolver":"https://pith.science/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM","bundle":"https://pith.science/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM/bundle.json","state":"https://pith.science/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AY2VTHFUKUOZIPRQPIJHIRBPCM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AY2VTHFUKUOZIPRQPIJHIRBPCM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b1a1d3ee10ca12812d2c3866c9985e32ba5af2294bff00a70148b6e5e225c843","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T03:42:04Z","title_canon_sha256":"edeb73f909d3fff31a6f24be7fbf967bd059c061754d492226eb4c12613ca7c7"},"schema_version":"1.0","source":{"id":"2606.03102","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03102","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03102v1","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03102","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"pith_short_12","alias_value":"AY2VTHFUKUOZ","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"pith_short_16","alias_value":"AY2VTHFUKUOZIPRQ","created_at":"2026-06-03T01:05:31Z"},{"alias_kind":"pith_short_8","alias_value":"AY2VTHFU","created_at":"2026-06-03T01:05:31Z"}],"graph_snapshots":[{"event_id":"sha256:f2ca6dce9fb184171e0c2886b5737c86afeac3b6426889d0d3870644d57d8d18","target":"graph","created_at":"2026-06-03T01:05:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.03102/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Test-time scaling improves the reasoning performance of large language models but incurs substantial cost in both total computation and latency. Existing adaptive sampling methods partially mitigate this issue by dynamically deciding when to stop sampling, yet they typically rely on heuristic rules or rely on distribution assumptions. In this work, we formulate adaptive sampling as a Markov decision process (MDP). We train a lightweight sampling controller with reinforcement learning (RL) to jointly balance answer correctness, latency, and computation cost. At each round, the controller decide","authors_text":"Chengsong Huang, Hongtu Zhu, Rui Liu, Runpeng Dai, Tong Zheng","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T03:42:04Z","title":"Small RL Controller, Large Language Model: RL-Guided Adaptive Sampling for Test-Time Scaling"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03102","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cd4a1406261af3cff5d1c97a9b2533504157f4b79352ca6030038e825a35a129","target":"record","created_at":"2026-06-03T01:05:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b1a1d3ee10ca12812d2c3866c9985e32ba5af2294bff00a70148b6e5e225c843","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T03:42:04Z","title_canon_sha256":"edeb73f909d3fff31a6f24be7fbf967bd059c061754d492226eb4c12613ca7c7"},"schema_version":"1.0","source":{"id":"2606.03102","kind":"arxiv","version":1}},"canonical_sha256":"0635599cb4551d943e307a1274442f1330c947971d74b772bc90a3ac7ba03229","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0635599cb4551d943e307a1274442f1330c947971d74b772bc90a3ac7ba03229","first_computed_at":"2026-06-03T01:05:31.728094Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T01:05:31.728094Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sXR8TxiAdccUEc0dJhO90KmqxQnuFLa0rjsj6wSODvgmtDsi2LIQAIyV3pwxQkrSYckEhrIV8reny+jAgB6yBA==","signature_status":"signed_v1","signed_at":"2026-06-03T01:05:31.728473Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.03102","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cd4a1406261af3cff5d1c97a9b2533504157f4b79352ca6030038e825a35a129","sha256:f2ca6dce9fb184171e0c2886b5737c86afeac3b6426889d0d3870644d57d8d18"],"state_sha256":"baada51b2d588a7f116dce498ead3addddef2f6df5460963c0cedccacc09beb0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jdgJdr47xv+hUTeSCPrM1CmTQIwuVbRALHQD3Iy4mU/tg7/90LDuw6UPSsVkideVBpSkLMdm1YSkC+yEZ+U2Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T21:05:19.553551Z","bundle_sha256":"01f9b05289d3bcbee4a0b48f4cfbd7e34ed56c4a3e3cdb4041690c8bc4230417"}}