{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:2O2ZG4FA6ODRRY5INPUYRK6KXQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2f13b854324551c9a6afe678c4a6c9cba929492ecf0a9fb74c8de7b95d2a1d30","cross_cats_sorted":["cs.AI","econ.EM","math.ST","stat.TH"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2026-06-15T18:06:20Z","title_canon_sha256":"7b5d70fd28bbc07818df7c0d5b9a9e719603f9fdeb87587c62b0a0974ab28dc8"},"schema_version":"1.0","source":{"id":"2606.17165","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.17165","created_at":"2026-06-19T16:12:57Z"},{"alias_kind":"arxiv_version","alias_value":"2606.17165v2","created_at":"2026-06-19T16:12:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17165","created_at":"2026-06-19T16:12:57Z"},{"alias_kind":"pith_short_12","alias_value":"2O2ZG4FA6ODR","created_at":"2026-06-19T16:12:57Z"},{"alias_kind":"pith_short_16","alias_value":"2O2ZG4FA6ODRRY5I","created_at":"2026-06-19T16:12:57Z"},{"alias_kind":"pith_short_8","alias_value":"2O2ZG4FA","created_at":"2026-06-19T16:12:57Z"}],"graph_snapshots":[{"event_id":"sha256:4880245423da2c992b244cec999c589fb251185ccbdf1bf2ceb1db0172529580","target":"graph","created_at":"2026-06-19T16:12:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.17165/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Organizations and researchers show increasing interest in using large language models (LLMs) in place of human participants in A/B tests, in the hope of experimenting faster and at lower cost. We study when a treatment effect estimated on LLM outcomes can recover the effect that would have been measured on the human population of interest. Distributional equivalence between LLM and human outcomes would make any standard estimator valid but is unrealistic. We therefore develop a statistical framework that adapts surrogate endpoint theory to LLMs, showing that calibrating LLM outcomes to human o","authors_text":"Joel Persson, M{\\aa}rten Schultzberg, Sebastian Ankargren","cross_cats":["cs.AI","econ.EM","math.ST","stat.TH"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2026-06-15T18:06:20Z","title":"Statistical Foundations of LLM-based A/B Testing: A Surrogacy Framework for Human Causal Inference"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17165","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d62bc85b52009611546d3eb3964fc7bcd9d39af9d26aaba8e24554133c13d215","target":"record","created_at":"2026-06-19T16:12:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2f13b854324551c9a6afe678c4a6c9cba929492ecf0a9fb74c8de7b95d2a1d30","cross_cats_sorted":["cs.AI","econ.EM","math.ST","stat.TH"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2026-06-15T18:06:20Z","title_canon_sha256":"7b5d70fd28bbc07818df7c0d5b9a9e719603f9fdeb87587c62b0a0974ab28dc8"},"schema_version":"1.0","source":{"id":"2606.17165","kind":"arxiv","version":2}},"canonical_sha256":"d3b59370a0f38718e3a86be988abcabc3b24bae05d24ee60d2b807087ed1c699","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d3b59370a0f38718e3a86be988abcabc3b24bae05d24ee60d2b807087ed1c699","first_computed_at":"2026-06-19T16:12:57.250958Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:57.250958Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"XVRDE8d4zKoG70wQ3zsYiTyhxt2aFhidJZquhDbz4sr3roG3el/MM9+c5CufNBeAw58YO/BtCE/uuli1ilp0Aw==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:57.251308Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.17165","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d62bc85b52009611546d3eb3964fc7bcd9d39af9d26aaba8e24554133c13d215","sha256:4880245423da2c992b244cec999c589fb251185ccbdf1bf2ceb1db0172529580"],"state_sha256":"c855a28aff6d88baa9eb203910739f5681adb38af55e80f8066a47c636a92543"}