{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PIJPU4DGAG6HKFENONK3GHUOQG","short_pith_number":"pith:PIJPU4DG","canonical_record":{"source":{"id":"2605.13537","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T13:47:06Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"15301f46178a0c9abbd2cf925adeec0b22941843232ebea12d1371a06a6438c2","abstract_canon_sha256":"2481e8d665fc0f3b97f22844448dad17070e6ff5a7fbcff3521f9ab6946ed229"},"schema_version":"1.0"},"canonical_sha256":"7a12fa706601bc75148d7355b31e8e81bbdea3e86a96a251d0c1c095d063e000","source":{"kind":"arxiv","id":"2605.13537","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13537","created_at":"2026-05-18T02:44:24Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13537v1","created_at":"2026-05-18T02:44:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13537","created_at":"2026-05-18T02:44:24Z"},{"alias_kind":"pith_short_12","alias_value":"PIJPU4DGAG6H","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PIJPU4DGAG6HKFEN","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PIJPU4DG","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PIJPU4DGAG6HKFENONK3GHUOQG","target":"record","payload":{"canonical_record":{"source":{"id":"2605.13537","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T13:47:06Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"15301f46178a0c9abbd2cf925adeec0b22941843232ebea12d1371a06a6438c2","abstract_canon_sha256":"2481e8d665fc0f3b97f22844448dad17070e6ff5a7fbcff3521f9ab6946ed229"},"schema_version":"1.0"},"canonical_sha256":"7a12fa706601bc75148d7355b31e8e81bbdea3e86a96a251d0c1c095d063e000","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:24.102866Z","signature_b64":"aRHplAYj4W8+41OmBt9jvew7u2LfZyhM0I7oVjp0K6iwLk30h4qHnbuid5n1znAP28SIQmLOL1l0dEL8zLgkCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7a12fa706601bc75148d7355b31e8e81bbdea3e86a96a251d0c1c095d063e000","last_reissued_at":"2026-05-18T02:44:24.102420Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:24.102420Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.13537","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:44:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"e3c200piihnRwtOq85hARqKdjg6C9foe8h4opzhCgMz5Rs/+A8bSYK8OnoE01i5uR1dkcHtkXr3CUjtta07FAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T01:31:13.394144Z"},"content_sha256":"16e8b27a147aba7ce2d03d6e6a97578bc5edd675c22dc26efe827b7299fa52c7","schema_version":"1.0","event_id":"sha256:16e8b27a147aba7ce2d03d6e6a97578bc5edd675c22dc26efe827b7299fa52c7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PIJPU4DGAG6HKFENONK3GHUOQG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Temper and Tilt Lead to SLOP: Reward Hacking Mitigation with Inference-Time Alignment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Adjusting reference-model temperature generalizes inference-time alignment to ensembles of reward models as a sharpened logarithmic opinion pool whose weights can be calibrated to reduce reward hacking.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Jing Liu, Toshiaki Koike-Akino, Ye Wang","submitted_at":"2026-05-13T13:47:06Z","abstract_excerpt":"Inference-time alignment techniques offer a lightweight alternative or complement to costly reinforcement learning, while enabling continual adaptation as alignment objectives and reward targets evolve. Existing theoretical analyses justify these methods as approximations to sampling from distributions optimally tilted toward a given reward model. We extend these techniques by introducing reference-model temperature adjustment, which leads to further generalization of inference-time alignment to ensembles of generative reward models combined as a sharpened logarithmic opinion pool (SLOP). To m"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"we propose an algorithm for calibrating SLOP weight parameters and experimentally demonstrate that it improves robustness while preserving alignment performance.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the proposed calibration algorithm for SLOP weights generalizes beyond the specific experimental setups and that the temperature adjustment reliably extends the theoretical approximations to ensembles without introducing new instabilities.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Temperature adjustment on the reference model generalizes inference-time alignment to SLOP ensembles of reward models, with a calibration algorithm that improves robustness to reward hacking while preserving alignment performance.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Adjusting reference-model temperature generalizes inference-time alignment to ensembles of reward models as a sharpened logarithmic opinion pool whose weights can be calibrated to reduce reward hacking.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f3e54cd3ae87a0b0e83520f93c4e5cd7fe8ec74efb36adc705ad2086b68f859d"},"source":{"id":"2605.13537","kind":"arxiv","version":1},"verdict":{"id":"4a3286de-e7db-4a28-a76e-7c88e5e54782","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T20:23:35.734164Z","strongest_claim":"we propose an algorithm for calibrating SLOP weight parameters and experimentally demonstrate that it improves robustness while preserving alignment performance.","one_line_summary":"Temperature adjustment on the reference model generalizes inference-time alignment to SLOP ensembles of reward models, with a calibration algorithm that improves robustness to reward hacking while preserving alignment performance.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the proposed calibration algorithm for SLOP weights generalizes beyond the specific experimental setups and that the temperature adjustment reliably extends the theoretical approximations to ensembles without introducing new instabilities.","pith_extraction_headline":"Adjusting reference-model temperature generalizes inference-time alignment to ensembles of reward models as a sharpened logarithmic opinion pool whose weights can be calibrated to reduce reward hacking."},"references":{"count":17,"sample":[{"doi":"","year":null,"title":"Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone","work_id":"feef9556-a016-493c-abd2-0c97a23a7ebf","ref_index":1,"cited_arxiv_id":"2404.14219","is_internal_anchor":true},{"doi":"","year":null,"title":"Best-of-n through the smoothing lens: KL divergence and regret analysis.arXiv preprint arXiv:2507.05913,","work_id":"ccc8aa5e-ef73-48c7-b330-580a9f264058","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","ref_index":3,"cited_arxiv_id":"2511.21631","is_internal_anchor":true},{"doi":"","year":null,"title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback","work_id":"a1f2574b-a899-4713-be60-c87ba332656c","ref_index":4,"cited_arxiv_id":"2204.05862","is_internal_anchor":true},{"doi":"","year":null,"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","ref_index":5,"cited_arxiv_id":"2110.14168","is_internal_anchor":true}],"resolved_work":17,"snapshot_sha256":"cafbb8323eff344499c6a04f3a8e62c7643ccb3d8f1de78fe7413351357ceaa4","internal_anchors":10},"formal_canon":{"evidence_count":2,"snapshot_sha256":"bc15b73c2d3815a820dea33133f9b4fa4c4922834ffb2229afe7ffd07ae47ec3"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"4a3286de-e7db-4a28-a76e-7c88e5e54782"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:44:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TBgfqcZjtO7yct5ATDEqlIfD/vOXmm2bFw33DYBzKp21aMDQXFw5skbKoUQhIBPw/DFvlDB14f5rW6XTHoFkBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T01:31:13.394662Z"},"content_sha256":"2aa8afbaef4212f553d2e4e7a4d9ccd4ddff97b4bba782c29dff71bd20d13c72","schema_version":"1.0","event_id":"sha256:2aa8afbaef4212f553d2e4e7a4d9ccd4ddff97b4bba782c29dff71bd20d13c72"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PIJPU4DGAG6HKFENONK3GHUOQG/bundle.json","state_url":"https://pith.science/pith/PIJPU4DGAG6HKFENONK3GHUOQG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PIJPU4DGAG6HKFENONK3GHUOQG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T01:31:13Z","links":{"resolver":"https://pith.science/pith/PIJPU4DGAG6HKFENONK3GHUOQG","bundle":"https://pith.science/pith/PIJPU4DGAG6HKFENONK3GHUOQG/bundle.json","state":"https://pith.science/pith/PIJPU4DGAG6HKFENONK3GHUOQG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PIJPU4DGAG6HKFENONK3GHUOQG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PIJPU4DGAG6HKFENONK3GHUOQG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2481e8d665fc0f3b97f22844448dad17070e6ff5a7fbcff3521f9ab6946ed229","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T13:47:06Z","title_canon_sha256":"15301f46178a0c9abbd2cf925adeec0b22941843232ebea12d1371a06a6438c2"},"schema_version":"1.0","source":{"id":"2605.13537","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.13537","created_at":"2026-05-18T02:44:24Z"},{"alias_kind":"arxiv_version","alias_value":"2605.13537v1","created_at":"2026-05-18T02:44:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13537","created_at":"2026-05-18T02:44:24Z"},{"alias_kind":"pith_short_12","alias_value":"PIJPU4DGAG6H","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PIJPU4DGAG6HKFEN","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PIJPU4DG","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:2aa8afbaef4212f553d2e4e7a4d9ccd4ddff97b4bba782c29dff71bd20d13c72","target":"graph","created_at":"2026-05-18T02:44:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we propose an algorithm for calibrating SLOP weight parameters and experimentally demonstrate that it improves robustness while preserving alignment performance."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the proposed calibration algorithm for SLOP weights generalizes beyond the specific experimental setups and that the temperature adjustment reliably extends the theoretical approximations to ensembles without introducing new instabilities."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Temperature adjustment on the reference model generalizes inference-time alignment to SLOP ensembles of reward models, with a calibration algorithm that improves robustness to reward hacking while preserving alignment performance."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Adjusting reference-model temperature generalizes inference-time alignment to ensembles of reward models as a sharpened logarithmic opinion pool whose weights can be calibrated to reduce reward hacking."}],"snapshot_sha256":"f3e54cd3ae87a0b0e83520f93c4e5cd7fe8ec74efb36adc705ad2086b68f859d"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"bc15b73c2d3815a820dea33133f9b4fa4c4922834ffb2229afe7ffd07ae47ec3"},"paper":{"abstract_excerpt":"Inference-time alignment techniques offer a lightweight alternative or complement to costly reinforcement learning, while enabling continual adaptation as alignment objectives and reward targets evolve. Existing theoretical analyses justify these methods as approximations to sampling from distributions optimally tilted toward a given reward model. We extend these techniques by introducing reference-model temperature adjustment, which leads to further generalization of inference-time alignment to ensembles of generative reward models combined as a sharpened logarithmic opinion pool (SLOP). To m","authors_text":"Jing Liu, Toshiaki Koike-Akino, Ye Wang","cross_cats":["cs.AI","cs.CL"],"headline":"Adjusting reference-model temperature generalizes inference-time alignment to ensembles of reward models as a sharpened logarithmic opinion pool whose weights can be calibrated to reduce reward hacking.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T13:47:06Z","title":"Temper and Tilt Lead to SLOP: Reward Hacking Mitigation with Inference-Time Alignment"},"references":{"count":17,"internal_anchors":10,"resolved_work":17,"sample":[{"cited_arxiv_id":"2404.14219","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone","work_id":"feef9556-a016-493c-abd2-0c97a23a7ebf","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Best-of-n through the smoothing lens: KL divergence and regret analysis.arXiv preprint arXiv:2507.05913,","work_id":"ccc8aa5e-ef73-48c7-b330-580a9f264058","year":null},{"cited_arxiv_id":"2511.21631","doi":"","is_internal_anchor":true,"ref_index":3,"title":"Qwen3-VL Technical Report","work_id":"1fe243aa-e3c0-4da6-b391-4cbcfc88d5c0","year":null},{"cited_arxiv_id":"2204.05862","doi":"","is_internal_anchor":true,"ref_index":4,"title":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback","work_id":"a1f2574b-a899-4713-be60-c87ba332656c","year":null},{"cited_arxiv_id":"2110.14168","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","year":null}],"snapshot_sha256":"cafbb8323eff344499c6a04f3a8e62c7643ccb3d8f1de78fe7413351357ceaa4"},"source":{"id":"2605.13537","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:23:35.734164Z","id":"4a3286de-e7db-4a28-a76e-7c88e5e54782","model_set":{"reader":"grok-4.3"},"one_line_summary":"Temperature adjustment on the reference model generalizes inference-time alignment to SLOP ensembles of reward models, with a calibration algorithm that improves robustness to reward hacking while preserving alignment performance.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Adjusting reference-model temperature generalizes inference-time alignment to ensembles of reward models as a sharpened logarithmic opinion pool whose weights can be calibrated to reduce reward hacking.","strongest_claim":"we propose an algorithm for calibrating SLOP weight parameters and experimentally demonstrate that it improves robustness while preserving alignment performance.","weakest_assumption":"That the proposed calibration algorithm for SLOP weights generalizes beyond the specific experimental setups and that the temperature adjustment reliably extends the theoretical approximations to ensembles without introducing new instabilities."}},"verdict_id":"4a3286de-e7db-4a28-a76e-7c88e5e54782"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:16e8b27a147aba7ce2d03d6e6a97578bc5edd675c22dc26efe827b7299fa52c7","target":"record","created_at":"2026-05-18T02:44:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2481e8d665fc0f3b97f22844448dad17070e6ff5a7fbcff3521f9ab6946ed229","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T13:47:06Z","title_canon_sha256":"15301f46178a0c9abbd2cf925adeec0b22941843232ebea12d1371a06a6438c2"},"schema_version":"1.0","source":{"id":"2605.13537","kind":"arxiv","version":1}},"canonical_sha256":"7a12fa706601bc75148d7355b31e8e81bbdea3e86a96a251d0c1c095d063e000","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7a12fa706601bc75148d7355b31e8e81bbdea3e86a96a251d0c1c095d063e000","first_computed_at":"2026-05-18T02:44:24.102420Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:44:24.102420Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"aRHplAYj4W8+41OmBt9jvew7u2LfZyhM0I7oVjp0K6iwLk30h4qHnbuid5n1znAP28SIQmLOL1l0dEL8zLgkCA==","signature_status":"signed_v1","signed_at":"2026-05-18T02:44:24.102866Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.13537","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:16e8b27a147aba7ce2d03d6e6a97578bc5edd675c22dc26efe827b7299fa52c7","sha256:2aa8afbaef4212f553d2e4e7a4d9ccd4ddff97b4bba782c29dff71bd20d13c72"],"state_sha256":"ec07cf6bbb965ed85608958f2932689f3bf2a6ac27ad719ddff57014a5a49b1c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZJXEUFSq+/IM+1UzvFZgs2Pss9EzOZXmQQDEE1UxwgenNcZbwxnZPE1vGtFdghK2KuDlzKPRD0VOv9Iatu/0BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T01:31:13.397275Z","bundle_sha256":"b35ef393acad259b238d2c8ed87287b020abaa8eb198709b39351135a72aa9b5"}}