{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:TXA7XASWTYJWVSMDZLVQCVWHTE","short_pith_number":"pith:TXA7XASW","canonical_record":{"source":{"id":"2305.17926","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-05-29T07:41:03Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"0ba7c25aed0362032899ff9fac27d26763553d15813c42c934f6e07274c9398c","abstract_canon_sha256":"e107e13651404e94eae5d12c7e083470d09dac7978c012369e39c8927b4ec367"},"schema_version":"1.0"},"canonical_sha256":"9dc1fb82569e136ac983caeb0156c79920a0c1cf5b2d10e6e2cfada985e5d478","source":{"kind":"arxiv","id":"2305.17926","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2305.17926","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2305.17926v2","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2305.17926","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"TXA7XASWTYJW","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"TXA7XASWTYJWVSMD","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"TXA7XASW","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:TXA7XASWTYJWVSMDZLVQCVWHTE","target":"record","payload":{"canonical_record":{"source":{"id":"2305.17926","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-05-29T07:41:03Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"0ba7c25aed0362032899ff9fac27d26763553d15813c42c934f6e07274c9398c","abstract_canon_sha256":"e107e13651404e94eae5d12c7e083470d09dac7978c012369e39c8927b4ec367"},"schema_version":"1.0"},"canonical_sha256":"9dc1fb82569e136ac983caeb0156c79920a0c1cf5b2d10e6e2cfada985e5d478","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:14.154368Z","signature_b64":"n4KSdimmxw//JnOxg98iSbywg41rExhD0WYoSMXtPNp3chReulGb+qHSEwXhjpumte+2WILBb51kQWAChrPEAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9dc1fb82569e136ac983caeb0156c79920a0c1cf5b2d10e6e2cfada985e5d478","last_reissued_at":"2026-05-17T23:38:14.153571Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:14.153571Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2305.17926","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Xb1WUXHsiZzSaS1td15Sf+ZW6k9yYo4nNe6P//hRbHMbYcteQyR1STi41IUH0KpA0HjzA+hob24bIFiMd8jcAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T10:30:07.016803Z"},"content_sha256":"58166d118f47049eb9a1d3179c6b9594d560d8da2e55b12ca6be2a3f513979a8","schema_version":"1.0","event_id":"sha256:58166d118f47049eb9a1d3179c6b9594d560d8da2e55b12ca6be2a3f513979a8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:TXA7XASWTYJWVSMDZLVQCVWHTE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Large Language Models are not Fair Evaluators","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Large language models used as evaluators favor responses according to their order in the prompt.","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CL","authors_text":"Binghuai Lin, Dawei Zhu, Lei Li, Liang Chen, Peiyi Wang, Qi Liu, Tianyu Liu, Yunbo Cao, Zefan Cai, Zhifang Sui","submitted_at":"2023-05-29T07:41:03Z","abstract_excerpt":"In this paper, we uncover a systematic bias in the evaluation paradigm of adopting large language models~(LLMs), e.g., GPT-4, as a referee to score and compare the quality of responses generated by candidate models. We find that the quality ranking of candidate responses can be easily hacked by simply altering their order of appearance in the context. This manipulation allows us to skew the evaluation result, making one model appear considerably superior to the other, e.g., Vicuna-13B could beat ChatGPT on 66 over 80 tested queries with ChatGPT as an evaluator. To address this issue, we propos"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"the quality ranking of candidate responses can be easily hacked by simply altering their order of appearance in the context. This manipulation allows us to skew the evaluation result, making one model appear considerably superior to the other, e.g., Vicuna-13B could beat ChatGPT on 66 over 80 tested queries with ChatGPT as an evaluator.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That human annotations collected on the Vicuna benchmark questions constitute a stable and unbiased ground truth against which LLM judgments can be calibrated.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"LLMs show strong position bias when scoring model outputs, allowing easy manipulation of rankings, but calibration with multiple evidence, position balancing, and selective human input reduces this bias to better match human judgments.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Large language models used as evaluators favor responses according to their order in the prompt.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"e8fe65c4f96d4c05160237347496aeaa0d28cd7376949bd6ca18235d3e7000e4"},"source":{"id":"2305.17926","kind":"arxiv","version":2},"verdict":{"id":"e5367dc8-1fb8-402d-a38f-16ce3a1be36a","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T12:05:24.426089Z","strongest_claim":"the quality ranking of candidate responses can be easily hacked by simply altering their order of appearance in the context. This manipulation allows us to skew the evaluation result, making one model appear considerably superior to the other, e.g., Vicuna-13B could beat ChatGPT on 66 over 80 tested queries with ChatGPT as an evaluator.","one_line_summary":"LLMs show strong position bias when scoring model outputs, allowing easy manipulation of rankings, but calibration with multiple evidence, position balancing, and selective human input reduces this bias to better match human judgments.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That human annotations collected on the Vicuna benchmark questions constitute a stable and unbiased ground truth against which LLM judgments can be calibrated.","pith_extraction_headline":"Large language models used as evaluators favor responses according to their order in the prompt."},"references":{"count":72,"sample":[{"doi":"","year":2019,"title":"Belinkov, Y.; Poliak, A.; Shieber, S.; Van Durme, B.; and Rush, A. 2019. Don ' t Take the Premise for Granted: Mitigating Artifacts in Natural Language Inference. In Proceedings of the 57th Annual Mee","work_id":"099acdd3-ec45-46fa-8a38-bc935d292cc0","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Brown, T. B.; Mann, B.; Ryder, N.; Subbiah, M.; Kaplan, J.; Dhariwal, P.; Neelakantan, A.; Shyam, P.; Sastry, G.; Askell, A.; Agarwal, S.; Herbert - Voss, A.; Krueger, G.; Henighan, T.; Child, R.; Ram","work_id":"7482ffb4-572e-4267-8c72-e5a5ff5eb542","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Cai, Z.; Tu, L.; and Gimpel, K. 2017. Pay Attention to the Ending:Strong Neural Baselines for the ROC Story Cloze Task. In Proceedings of the 55th Annual Meeting of the Association for Computational L","work_id":"ea44d084-0b7c-41a6-9fc3-9c354af34f7a","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","ref_index":6,"cited_arxiv_id":"2204.02311","is_internal_anchor":true},{"doi":"","year":2023,"title":"LLaMA-Adapter V2: Parameter-Efficient Visual Instruction Model","work_id":"0fe2cfd8-d442-4ceb-b1a9-a465704f39b2","ref_index":9,"cited_arxiv_id":"2304.15010","is_internal_anchor":true}],"resolved_work":72,"snapshot_sha256":"40b18502bd3aa18145e3d4ed24d5660671bf6c58b05ec29b63d4e06b22e95d5a","internal_anchors":10},"formal_canon":{"evidence_count":2,"snapshot_sha256":"310b8c173c245eed116c79825b9fc1cc714eca83ec05998962494fd6024e87be"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"e5367dc8-1fb8-402d-a38f-16ce3a1be36a"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7mtlEYRehTzjAUZ5PUrBIbw2vZ4e/L20Ctxhtf+g2qFtYnlnVwTGqH2otPi47wHHpTvvY3dWVtSrS3Dx5SuoBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T10:30:07.017349Z"},"content_sha256":"eb6325a93c13491df82b91e399887d0274c97a2bfd4b7d9e3541b27cd317bbd1","schema_version":"1.0","event_id":"sha256:eb6325a93c13491df82b91e399887d0274c97a2bfd4b7d9e3541b27cd317bbd1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/TXA7XASWTYJWVSMDZLVQCVWHTE/bundle.json","state_url":"https://pith.science/pith/TXA7XASWTYJWVSMDZLVQCVWHTE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/TXA7XASWTYJWVSMDZLVQCVWHTE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T10:30:07Z","links":{"resolver":"https://pith.science/pith/TXA7XASWTYJWVSMDZLVQCVWHTE","bundle":"https://pith.science/pith/TXA7XASWTYJWVSMDZLVQCVWHTE/bundle.json","state":"https://pith.science/pith/TXA7XASWTYJWVSMDZLVQCVWHTE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/TXA7XASWTYJWVSMDZLVQCVWHTE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:TXA7XASWTYJWVSMDZLVQCVWHTE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e107e13651404e94eae5d12c7e083470d09dac7978c012369e39c8927b4ec367","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-05-29T07:41:03Z","title_canon_sha256":"0ba7c25aed0362032899ff9fac27d26763553d15813c42c934f6e07274c9398c"},"schema_version":"1.0","source":{"id":"2305.17926","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2305.17926","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2305.17926v2","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2305.17926","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"TXA7XASWTYJW","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"TXA7XASWTYJWVSMD","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"TXA7XASW","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:eb6325a93c13491df82b91e399887d0274c97a2bfd4b7d9e3541b27cd317bbd1","target":"graph","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"the quality ranking of candidate responses can be easily hacked by simply altering their order of appearance in the context. This manipulation allows us to skew the evaluation result, making one model appear considerably superior to the other, e.g., Vicuna-13B could beat ChatGPT on 66 over 80 tested queries with ChatGPT as an evaluator."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That human annotations collected on the Vicuna benchmark questions constitute a stable and unbiased ground truth against which LLM judgments can be calibrated."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"LLMs show strong position bias when scoring model outputs, allowing easy manipulation of rankings, but calibration with multiple evidence, position balancing, and selective human input reduces this bias to better match human judgments."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Large language models used as evaluators favor responses according to their order in the prompt."}],"snapshot_sha256":"e8fe65c4f96d4c05160237347496aeaa0d28cd7376949bd6ca18235d3e7000e4"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"310b8c173c245eed116c79825b9fc1cc714eca83ec05998962494fd6024e87be"},"paper":{"abstract_excerpt":"In this paper, we uncover a systematic bias in the evaluation paradigm of adopting large language models~(LLMs), e.g., GPT-4, as a referee to score and compare the quality of responses generated by candidate models. We find that the quality ranking of candidate responses can be easily hacked by simply altering their order of appearance in the context. This manipulation allows us to skew the evaluation result, making one model appear considerably superior to the other, e.g., Vicuna-13B could beat ChatGPT on 66 over 80 tested queries with ChatGPT as an evaluator. To address this issue, we propos","authors_text":"Binghuai Lin, Dawei Zhu, Lei Li, Liang Chen, Peiyi Wang, Qi Liu, Tianyu Liu, Yunbo Cao, Zefan Cai, Zhifang Sui","cross_cats":["cs.AI","cs.IR"],"headline":"Large language models used as evaluators favor responses according to their order in the prompt.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-05-29T07:41:03Z","title":"Large Language Models are not Fair Evaluators"},"references":{"count":72,"internal_anchors":10,"resolved_work":72,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Belinkov, Y.; Poliak, A.; Shieber, S.; Van Durme, B.; and Rush, A. 2019. Don ' t Take the Premise for Granted: Mitigating Artifacts in Natural Language Inference. In Proceedings of the 57th Annual Mee","work_id":"099acdd3-ec45-46fa-8a38-bc935d292cc0","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Brown, T. B.; Mann, B.; Ryder, N.; Subbiah, M.; Kaplan, J.; Dhariwal, P.; Neelakantan, A.; Shyam, P.; Sastry, G.; Askell, A.; Agarwal, S.; Herbert - Voss, A.; Krueger, G.; Henighan, T.; Child, R.; Ram","work_id":"7482ffb4-572e-4267-8c72-e5a5ff5eb542","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Cai, Z.; Tu, L.; and Gimpel, K. 2017. Pay Attention to the Ending:Strong Neural Baselines for the ROC Story Cloze Task. In Proceedings of the 55th Annual Meeting of the Association for Computational L","work_id":"ea44d084-0b7c-41a6-9fc3-9c354af34f7a","year":2017},{"cited_arxiv_id":"2204.02311","doi":"","is_internal_anchor":true,"ref_index":6,"title":"PaLM: Scaling Language Modeling with Pathways","work_id":"a94f3ef7-2c49-4445-93fe-6ec16aafd966","year":2022},{"cited_arxiv_id":"2304.15010","doi":"","is_internal_anchor":true,"ref_index":9,"title":"LLaMA-Adapter V2: Parameter-Efficient Visual Instruction Model","work_id":"0fe2cfd8-d442-4ceb-b1a9-a465704f39b2","year":2023}],"snapshot_sha256":"40b18502bd3aa18145e3d4ed24d5660671bf6c58b05ec29b63d4e06b22e95d5a"},"source":{"id":"2305.17926","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T12:05:24.426089Z","id":"e5367dc8-1fb8-402d-a38f-16ce3a1be36a","model_set":{"reader":"grok-4.3"},"one_line_summary":"LLMs show strong position bias when scoring model outputs, allowing easy manipulation of rankings, but calibration with multiple evidence, position balancing, and selective human input reduces this bias to better match human judgments.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Large language models used as evaluators favor responses according to their order in the prompt.","strongest_claim":"the quality ranking of candidate responses can be easily hacked by simply altering their order of appearance in the context. This manipulation allows us to skew the evaluation result, making one model appear considerably superior to the other, e.g., Vicuna-13B could beat ChatGPT on 66 over 80 tested queries with ChatGPT as an evaluator.","weakest_assumption":"That human annotations collected on the Vicuna benchmark questions constitute a stable and unbiased ground truth against which LLM judgments can be calibrated."}},"verdict_id":"e5367dc8-1fb8-402d-a38f-16ce3a1be36a"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:58166d118f47049eb9a1d3179c6b9594d560d8da2e55b12ca6be2a3f513979a8","target":"record","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e107e13651404e94eae5d12c7e083470d09dac7978c012369e39c8927b4ec367","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2023-05-29T07:41:03Z","title_canon_sha256":"0ba7c25aed0362032899ff9fac27d26763553d15813c42c934f6e07274c9398c"},"schema_version":"1.0","source":{"id":"2305.17926","kind":"arxiv","version":2}},"canonical_sha256":"9dc1fb82569e136ac983caeb0156c79920a0c1cf5b2d10e6e2cfada985e5d478","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9dc1fb82569e136ac983caeb0156c79920a0c1cf5b2d10e6e2cfada985e5d478","first_computed_at":"2026-05-17T23:38:14.153571Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:14.153571Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"n4KSdimmxw//JnOxg98iSbywg41rExhD0WYoSMXtPNp3chReulGb+qHSEwXhjpumte+2WILBb51kQWAChrPEAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:14.154368Z","signed_message":"canonical_sha256_bytes"},"source_id":"2305.17926","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:58166d118f47049eb9a1d3179c6b9594d560d8da2e55b12ca6be2a3f513979a8","sha256:eb6325a93c13491df82b91e399887d0274c97a2bfd4b7d9e3541b27cd317bbd1"],"state_sha256":"8590e10b46b3b8f267067a7d46e4b944501941c2e8ddf864ef8615d263ccfc7a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5Aw0d6hhQWq3yw9xKw3InCJIJtGL8yv28Xpqa+1nlpKl8JmFB9C2WZGXGaB3or8oEPQcD08kmX6h2i+R0WJgCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T10:30:07.020222Z","bundle_sha256":"b72cb1f605c8218e47f54b91b40867afb64c39e1818099c6652e8eadbf787ace"}}