{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QO52G673TFPNFV7UWU2U5GQYSQ","short_pith_number":"pith:QO52G673","canonical_record":{"source":{"id":"2605.23055","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T21:38:34Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"8c720f0f0faaea13fe0bc64db49ef8f2316eea4c528993c1412942dacfbabe4a","abstract_canon_sha256":"781baf9442ed16e68620ed8e37926995eea32dab0369d62405df47b41c985f17"},"schema_version":"1.0"},"canonical_sha256":"83bba37bfb995ed2d7f4b5354e9a18940d15b01c479371edfb5d577dfc41b2e9","source":{"kind":"arxiv","id":"2605.23055","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23055","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23055v1","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23055","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"pith_short_12","alias_value":"QO52G673TFPN","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"pith_short_16","alias_value":"QO52G673TFPNFV7U","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"pith_short_8","alias_value":"QO52G673","created_at":"2026-05-25T02:01:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QO52G673TFPNFV7UWU2U5GQYSQ","target":"record","payload":{"canonical_record":{"source":{"id":"2605.23055","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T21:38:34Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"8c720f0f0faaea13fe0bc64db49ef8f2316eea4c528993c1412942dacfbabe4a","abstract_canon_sha256":"781baf9442ed16e68620ed8e37926995eea32dab0369d62405df47b41c985f17"},"schema_version":"1.0"},"canonical_sha256":"83bba37bfb995ed2d7f4b5354e9a18940d15b01c479371edfb5d577dfc41b2e9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:36.446526Z","signature_b64":"bvS4Wj+q3DY0C0UX5IuWJ+pQrd7ZUUtOsBo0mN/GWYYsOBLylNWlX8nnRnWZ7T6gHithZMMAwe4ZdaHwWKBABg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"83bba37bfb995ed2d7f4b5354e9a18940d15b01c479371edfb5d577dfc41b2e9","last_reissued_at":"2026-05-25T02:01:36.445803Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:36.445803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.23055","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Y/Ua3t0e8LSAxsOtQsNiXCnmhpY4+ybYXEbLmCp/k6SEVib+6QUogEwVTcxLot0c8Xjw7Uc6/ORJznYS+e5YCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T19:35:20.578339Z"},"content_sha256":"fcd02f4d5c6122712c46e65f3f539e586a6489af795b24cf3295348c75ce3fbb","schema_version":"1.0","event_id":"sha256:fcd02f4d5c6122712c46e65f3f539e586a6489af795b24cf3295348c75ce3fbb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QO52G673TFPNFV7UWU2U5GQYSQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Decomposing and Measuring Evaluation Awareness","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Changling Li, Jie Zhang, Maksym Andriushchenko, Sahar Abdelnabi, Terry Jingchen Zhang, Zhijing Jin","submitted_at":"2026-05-21T21:38:34Z","abstract_excerpt":"Frontier language models sometimes recognize that they are being evaluated and adjust their behavior, undermining validity of benchmark results. Yet the field studies it without a shared foundation, conflating properties of the evaluation with properties of the model, and detection with behavioral response. We ground evaluation awareness in social psychology, decomposing it into an environment component (how recognizable the task is) and a model component that separates recognition from propensity to act on it. We operationalize the environment component through eight categorized trigger facto"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23055","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.23055/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FqWqbwTDqljWCnuF5lmk6/4H4Qxj8R03Y3HOHrP3ebAUBka4U1frL9ADqBUn7lVmXTnQjVKXKsfBDuRcpot5DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T19:35:20.578718Z"},"content_sha256":"21450f141a41c842c71f02f3690c33f42776297836200dedd9da880882c548b7","schema_version":"1.0","event_id":"sha256:21450f141a41c842c71f02f3690c33f42776297836200dedd9da880882c548b7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QO52G673TFPNFV7UWU2U5GQYSQ/bundle.json","state_url":"https://pith.science/pith/QO52G673TFPNFV7UWU2U5GQYSQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QO52G673TFPNFV7UWU2U5GQYSQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T19:35:20Z","links":{"resolver":"https://pith.science/pith/QO52G673TFPNFV7UWU2U5GQYSQ","bundle":"https://pith.science/pith/QO52G673TFPNFV7UWU2U5GQYSQ/bundle.json","state":"https://pith.science/pith/QO52G673TFPNFV7UWU2U5GQYSQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QO52G673TFPNFV7UWU2U5GQYSQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QO52G673TFPNFV7UWU2U5GQYSQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"781baf9442ed16e68620ed8e37926995eea32dab0369d62405df47b41c985f17","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T21:38:34Z","title_canon_sha256":"8c720f0f0faaea13fe0bc64db49ef8f2316eea4c528993c1412942dacfbabe4a"},"schema_version":"1.0","source":{"id":"2605.23055","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23055","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23055v1","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23055","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"pith_short_12","alias_value":"QO52G673TFPN","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"pith_short_16","alias_value":"QO52G673TFPNFV7U","created_at":"2026-05-25T02:01:36Z"},{"alias_kind":"pith_short_8","alias_value":"QO52G673","created_at":"2026-05-25T02:01:36Z"}],"graph_snapshots":[{"event_id":"sha256:21450f141a41c842c71f02f3690c33f42776297836200dedd9da880882c548b7","target":"graph","created_at":"2026-05-25T02:01:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.23055/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Frontier language models sometimes recognize that they are being evaluated and adjust their behavior, undermining validity of benchmark results. Yet the field studies it without a shared foundation, conflating properties of the evaluation with properties of the model, and detection with behavioral response. We ground evaluation awareness in social psychology, decomposing it into an environment component (how recognizable the task is) and a model component that separates recognition from propensity to act on it. We operationalize the environment component through eight categorized trigger facto","authors_text":"Changling Li, Jie Zhang, Maksym Andriushchenko, Sahar Abdelnabi, Terry Jingchen Zhang, Zhijing Jin","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T21:38:34Z","title":"Decomposing and Measuring Evaluation Awareness"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23055","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fcd02f4d5c6122712c46e65f3f539e586a6489af795b24cf3295348c75ce3fbb","target":"record","created_at":"2026-05-25T02:01:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"781baf9442ed16e68620ed8e37926995eea32dab0369d62405df47b41c985f17","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-21T21:38:34Z","title_canon_sha256":"8c720f0f0faaea13fe0bc64db49ef8f2316eea4c528993c1412942dacfbabe4a"},"schema_version":"1.0","source":{"id":"2605.23055","kind":"arxiv","version":1}},"canonical_sha256":"83bba37bfb995ed2d7f4b5354e9a18940d15b01c479371edfb5d577dfc41b2e9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"83bba37bfb995ed2d7f4b5354e9a18940d15b01c479371edfb5d577dfc41b2e9","first_computed_at":"2026-05-25T02:01:36.445803Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:36.445803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bvS4Wj+q3DY0C0UX5IuWJ+pQrd7ZUUtOsBo0mN/GWYYsOBLylNWlX8nnRnWZ7T6gHithZMMAwe4ZdaHwWKBABg==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:36.446526Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.23055","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fcd02f4d5c6122712c46e65f3f539e586a6489af795b24cf3295348c75ce3fbb","sha256:21450f141a41c842c71f02f3690c33f42776297836200dedd9da880882c548b7"],"state_sha256":"8120d1ac9311e7935cd2aa6b10ed66bfef89249fa85939b0c6f2600021062a32"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J8kOBLzvuOBqhi8/9l7m/KRP4aZgA9cdaI4YBaIzukdmCy+EZ7E/8aosQAIGzH3UhkSuWrPSwTKRfsE9qfKYBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T19:35:20.580643Z","bundle_sha256":"65eca5f3a5274335f7d5bf782a7f69674a296022bf6e73e1953acb8311eb6cb6"}}