{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:HPCNDVJZVS232BRQFIMTV73FYZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f5ddf6745e09772a4a56b67cd5d1b605113a718dbc27bec0c357751c96bd7fa4","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-30T07:52:14Z","title_canon_sha256":"b89165e446e1ec949cb8b9a5c903009322141d1395bb390cc48c4acb0693f7c5"},"schema_version":"1.0","source":{"id":"2510.26219","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.26219","created_at":"2026-06-04T01:08:35Z"},{"alias_kind":"arxiv_version","alias_value":"2510.26219v3","created_at":"2026-06-04T01:08:35Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.26219","created_at":"2026-06-04T01:08:35Z"},{"alias_kind":"pith_short_12","alias_value":"HPCNDVJZVS23","created_at":"2026-06-04T01:08:35Z"},{"alias_kind":"pith_short_16","alias_value":"HPCNDVJZVS232BRQ","created_at":"2026-06-04T01:08:35Z"},{"alias_kind":"pith_short_8","alias_value":"HPCNDVJZ","created_at":"2026-06-04T01:08:35Z"}],"graph_snapshots":[{"event_id":"sha256:b26301d4985bda98796c0de021cac118890803ea60e56bd1d042805f99f66830","target":"graph","created_at":"2026-06-04T01:08:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.26219/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Test-time alignment of large language models (LLMs) attracts attention because fine-tuning of LLMs requires high computational costs. In this paper, we propose a new test-time reward-guided alignment method called adaptive importance sampling on pre-logits (AISP) on the basis of the sampling-based model predictive control with the stochastic control input. AISP applies the Gaussian perturbation into pre-logits, which are outputs of the penultimate layer, so as to maximize expected rewards with respect to the mean of the perturbation. We demonstrate that the optimal mean is obtained by importan","authors_text":"Haru Kuroki, Hiroshi Takahashi, Kazumune Hashimoto, Sekitoshi Kanai, Tsukasa Yoshida","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-30T07:52:14Z","title":"Test-time reward-guided alignment of language models by importance sampling on pre-logit space"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.26219","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8b226acf228ede096a35b80468e22187f7537668c60341e9d3d3187ba75faa06","target":"record","created_at":"2026-06-04T01:08:35Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f5ddf6745e09772a4a56b67cd5d1b605113a718dbc27bec0c357751c96bd7fa4","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-30T07:52:14Z","title_canon_sha256":"b89165e446e1ec949cb8b9a5c903009322141d1395bb390cc48c4acb0693f7c5"},"schema_version":"1.0","source":{"id":"2510.26219","kind":"arxiv","version":3}},"canonical_sha256":"3bc4d1d539acb5bd06302a193aff65c6764bb957950187b5708efeeeef56cfb1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3bc4d1d539acb5bd06302a193aff65c6764bb957950187b5708efeeeef56cfb1","first_computed_at":"2026-06-04T01:08:35.176929Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-04T01:08:35.176929Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5pNMYk8+MhTEf+vJUdnWt4ElClaPchp0/aHeMAxI4NzjmWVJ594l+gPwmfpCaoBcVBrY8DbwoOQgFCzDKnNeBg==","signature_status":"signed_v1","signed_at":"2026-06-04T01:08:35.177861Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.26219","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8b226acf228ede096a35b80468e22187f7537668c60341e9d3d3187ba75faa06","sha256:b26301d4985bda98796c0de021cac118890803ea60e56bd1d042805f99f66830"],"state_sha256":"d4e6e638c8c508a7cda7f5bb9ed5f8e5796c122b3d983fb0411737c813d413ff"}