{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:S5UME6L36M5U6EHALQZWXJWWIJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3347d0cedd1b2c4822a2bed7164649b2f38c4c0090a9d30b3865f5e27ce6318a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","title_canon_sha256":"762e14629aec15a742ef790ecb341ca8274a17f90d3257b780c71ba6d557b56a"},"schema_version":"1.0","source":{"id":"2606.24596","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24596","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24596v1","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24596","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_12","alias_value":"S5UME6L36M5U","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_16","alias_value":"S5UME6L36M5U6EHA","created_at":"2026-06-24T01:15:36Z"},{"alias_kind":"pith_short_8","alias_value":"S5UME6L3","created_at":"2026-06-24T01:15:36Z"}],"graph_snapshots":[{"event_id":"sha256:98289d3b6a2a35721a8786fb2589b9ca404a5f1a3e4f9292053d8df41a29cdb9","target":"graph","created_at":"2026-06-24T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24596/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As Large Language Models are increasingly deployed in critical applications, robustly evaluating their social biases is paramount. However, the current literature suffers from widespread methodological fragmentation, which yields contradictory conclusions. This stems largely from ignoring the structural framing of benchmark-level evaluations. To resolve this, we introduce a unified and controllable framework that standardizes heterogeneous benchmarks to systematically contrast isolated demographic assessments with forced-choice comparative settings. Crucially, this allows us to disentangle the","authors_text":"Federico Marcuzzi, Iryna Gurevych, Roy Schwartz, Xuefei Ning","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","title":"To Compare, or Not to Compare: On Methodological Practices in Evaluating Social Bias"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24596","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f075d35e02f2d1245c5b0e2870843adb50d82b81d41f37c474132287910cb701","target":"record","created_at":"2026-06-24T01:15:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3347d0cedd1b2c4822a2bed7164649b2f38c4c0090a9d30b3865f5e27ce6318a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T13:53:50Z","title_canon_sha256":"762e14629aec15a742ef790ecb341ca8274a17f90d3257b780c71ba6d557b56a"},"schema_version":"1.0","source":{"id":"2606.24596","kind":"arxiv","version":1}},"canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9768c2797bf33b4f10e05c336ba6d642729feebca063dbccbb433de6a49dcb28","first_computed_at":"2026-06-24T01:15:36.497720Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:36.497720Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ynnc4hqIXtiIoeg1IeH5oBeWHNzYEqbU8RjCdivJeDZ9rkV7yZjBBsSLix9CHilgpCb9kEzSy6vCwUXEuhgwCQ==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:36.498121Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24596","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f075d35e02f2d1245c5b0e2870843adb50d82b81d41f37c474132287910cb701","sha256:98289d3b6a2a35721a8786fb2589b9ca404a5f1a3e4f9292053d8df41a29cdb9"],"state_sha256":"0bb584654d3e4d6534f9fe495a49d2582583e43558873771a9d5802c470bfd89"}