{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2T6KZE534QRGFEICAJOAH6GIKK","short_pith_number":"pith:2T6KZE53","schema_version":"1.0","canonical_sha256":"d4fcac93bbe422629102025c03f8c85286e95ed23f8c20f98c22c596276757ba","source":{"kind":"arxiv","id":"2606.22974","version":1},"attestation_state":"computed","paper":{"title":"When Preferences Fail to Become Incentives: A Utility-Behavior Gap in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Christopher M. Ackerman, Yujun Zhou","submitted_at":"2026-06-22T07:54:15Z","abstract_excerpt":"Recent work on preference elicitation in large language models (LLMs) has demonstrated that, when given a series of choices between two outcomes, LLMs reveal a coherent, model-specific utility structure. Notably, this structure often includes preferences that the models' trainers did not intend, such as valuing people of some nationalities above others, raising the possibility that LLMs might be forming emergent, misaligned goals, which, if true, would have major safety implications. However, the choice paradigms in which these preferences are observed are not reflective of real-world situatio"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.22974","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T07:54:15Z","cross_cats_sorted":[],"title_canon_sha256":"696e726fff56d5340922f542f4271066b3b1423bb27f35a09978f01ac06f6e65","abstract_canon_sha256":"40c9545654836ca5392651565ec17dfbe8151e7d4c8b0cccf2586bec3a694195"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:14:05.675233Z","signature_b64":"0ca6cvvBTutdfRu22XhB4aaX+4ZA6GjV736VUoC7Xg5FmPkkbWSDTsbjI2x6iGpTB+o3xGefYXwcjha7cO0QAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d4fcac93bbe422629102025c03f8c85286e95ed23f8c20f98c22c596276757ba","last_reissued_at":"2026-06-23T03:14:05.674880Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:14:05.674880Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"When Preferences Fail to Become Incentives: A Utility-Behavior Gap in Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Christopher M. Ackerman, Yujun Zhou","submitted_at":"2026-06-22T07:54:15Z","abstract_excerpt":"Recent work on preference elicitation in large language models (LLMs) has demonstrated that, when given a series of choices between two outcomes, LLMs reveal a coherent, model-specific utility structure. Notably, this structure often includes preferences that the models' trainers did not intend, such as valuing people of some nationalities above others, raising the possibility that LLMs might be forming emergent, misaligned goals, which, if true, would have major safety implications. However, the choice paradigms in which these preferences are observed are not reflective of real-world situatio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.22974","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.22974/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.22974","created_at":"2026-06-23T03:14:05.674940+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.22974v1","created_at":"2026-06-23T03:14:05.674940+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.22974","created_at":"2026-06-23T03:14:05.674940+00:00"},{"alias_kind":"pith_short_12","alias_value":"2T6KZE534QRG","created_at":"2026-06-23T03:14:05.674940+00:00"},{"alias_kind":"pith_short_16","alias_value":"2T6KZE534QRGFEIC","created_at":"2026-06-23T03:14:05.674940+00:00"},{"alias_kind":"pith_short_8","alias_value":"2T6KZE53","created_at":"2026-06-23T03:14:05.674940+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK","json":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK.json","graph_json":"https://pith.science/api/pith-number/2T6KZE534QRGFEICAJOAH6GIKK/graph.json","events_json":"https://pith.science/api/pith-number/2T6KZE534QRGFEICAJOAH6GIKK/events.json","paper":"https://pith.science/paper/2T6KZE53"},"agent_actions":{"view_html":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK","download_json":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK.json","view_paper":"https://pith.science/paper/2T6KZE53","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.22974&json=true","fetch_graph":"https://pith.science/api/pith-number/2T6KZE534QRGFEICAJOAH6GIKK/graph.json","fetch_events":"https://pith.science/api/pith-number/2T6KZE534QRGFEICAJOAH6GIKK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK/action/storage_attestation","attest_author":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK/action/author_attestation","sign_citation":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK/action/citation_signature","submit_replication":"https://pith.science/pith/2T6KZE534QRGFEICAJOAH6GIKK/action/replication_record"}},"created_at":"2026-06-23T03:14:05.674940+00:00","updated_at":"2026-06-23T03:14:05.674940+00:00"}