{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4N7ABM2UUZYZF3F36EJNP5OP3X","short_pith_number":"pith:4N7ABM2U","canonical_record":{"source":{"id":"2605.12899","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T02:24:57Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"7346a51bd392f998a0063714cadaeae5b23ff6c39a3e59ea82cfeb86b831ca80","abstract_canon_sha256":"ac7f3abda1c207275f824ce485f77e876d5ba85470924061ab5c3b6f26205530"},"schema_version":"1.0"},"canonical_sha256":"e37e00b354a67192ecbbf112d7f5cfddec09f483c9ddfb6999752e88d6ca355f","source":{"kind":"arxiv","id":"2605.12899","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12899","created_at":"2026-05-18T03:09:10Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12899v1","created_at":"2026-05-18T03:09:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12899","created_at":"2026-05-18T03:09:10Z"},{"alias_kind":"pith_short_12","alias_value":"4N7ABM2UUZYZ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4N7ABM2UUZYZF3F3","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4N7ABM2U","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4N7ABM2UUZYZF3F36EJNP5OP3X","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12899","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T02:24:57Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"7346a51bd392f998a0063714cadaeae5b23ff6c39a3e59ea82cfeb86b831ca80","abstract_canon_sha256":"ac7f3abda1c207275f824ce485f77e876d5ba85470924061ab5c3b6f26205530"},"schema_version":"1.0"},"canonical_sha256":"e37e00b354a67192ecbbf112d7f5cfddec09f483c9ddfb6999752e88d6ca355f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:10.767288Z","signature_b64":"lv2dcR//mM8rzvIU8LJsvdRnIgVC3ui33ky/MElEb38/zrxhpEyKS7MRxVQzPkAtD3bDK7wgpZvlePiCmCU7Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e37e00b354a67192ecbbf112d7f5cfddec09f483c9ddfb6999752e88d6ca355f","last_reissued_at":"2026-05-18T03:09:10.766621Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:10.766621Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12899","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tPRwcGErGJ8S5C3QTBNZaN4oJCIJsKpEHq5JOuVCKpgAOQPs3ZjaDsyKiLeolx0shMcJJzDSEKXsNZw1diWvCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T17:45:36.622121Z"},"content_sha256":"f4356c9d36339ad31156bd0fdf9d2d1304a5d467166ee6735ed2ab44291b3231","schema_version":"1.0","event_id":"sha256:f4356c9d36339ad31156bd0fdf9d2d1304a5d467166ee6735ed2ab44291b3231"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4N7ABM2UUZYZF3F36EJNP5OP3X","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Robust Sequential Experimental Design for A/B Testing","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Robust sequential experimental design bounds the worst-case mean squared error of estimated treatment effects in A/B testing under model misspecification.","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Chengchun Shi, Hongtu Zhu, Niansheng Tang, Qianglin Wen, Ting Li, Xiangkun Wu, Yingying Zhang","submitted_at":"2026-05-13T02:24:57Z","abstract_excerpt":"Experimental design has emerged as a powerful approach for improving the sample efficiency of A/B testing, yet existing designs rely critically on correctly specified models. We study robust sequential experimental design under model misspecification and develop a unified framework that covers both contextual bandit and dynamic settings. Theoretically, we prove that our design bounds the worst-case mean squared error of the estimated treatment effect. Empirically, we demonstrate the effectiveness of the proposed approach using synthetic and real-world datasets from a leading technology company"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Theoretically, we prove that our design bounds the worst-case mean squared error of the estimated treatment effect.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The unified framework assumes that model misspecification can be controlled within a single design that covers both contextual bandit and dynamic settings, but the abstract provides no details on the precise class of misspecification or the conditions required for the bound to hold.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A unified robust framework for sequential A/B testing bounds the worst-case mean squared error of treatment effect estimates under model misspecification in both contextual bandit and dynamic regimes.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Robust sequential experimental design bounds the worst-case mean squared error of estimated treatment effects in A/B testing under model misspecification.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a9ffbdc626b15d84ba149c5a172381eb1cf5b486606f6513b5f22f9e438a854c"},"source":{"id":"2605.12899","kind":"arxiv","version":1},"verdict":{"id":"eedbc4e2-ab54-495e-ac30-86dcbe84fe9d","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T19:11:48.650031Z","strongest_claim":"Theoretically, we prove that our design bounds the worst-case mean squared error of the estimated treatment effect.","one_line_summary":"A unified robust framework for sequential A/B testing bounds the worst-case mean squared error of treatment effect estimates under model misspecification in both contextual bandit and dynamic regimes.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The unified framework assumes that model misspecification can be controlled within a single design that covers both contextual bandit and dynamic settings, but the abstract provides no details on the precise class of misspecification or the conditions required for the bound to hold.","pith_extraction_headline":"Robust sequential experimental design bounds the worst-case mean squared error of estimated treatment effects in A/B testing under model misspecification."},"references":{"count":25,"sample":[{"doi":"10.1093/jrsssb/qkad072","year":null,"title":"doi: 10.1093/jrsssb/qkad072. Atkinson, A. C. Optimum biased coin designs for sequential clinical trials with prognostic factors.Biometrika, 69(1): 61–67,","work_id":"64d7c3b7-a1a9-49d2-af4e-d497ec034a8f","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"arXiv preprint arXiv:2112.13495 , year=","work_id":"c85a376e-42ba-4630-9ad4-5f6bfa6fbd4c","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1287/mnsc.2019.3424","year":2019,"title":"URL https: //doi.org/10.1287/mnsc.2019.3424","work_id":"402d6e31-7986-4385-b1c4-841c353ecd04","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Budgeted active experimen- tation for treatment effect estimation from observational and randomized data.arXiv preprint arXiv:2602.22021,","work_id":"947bac34-41ac-421f-b632-9b25bdb97a5e","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Detecting interference in A/B testing with increasing allocation.arXiv preprint arXiv:2211.03262,","work_id":"a0ef6531-c11f-47c8-9ddc-c06b5352ad9c","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":25,"snapshot_sha256":"07d8d65218bb8c3342338ce1a5e9cad828494e815e6f9fe9c8d06e0ae29a5667","internal_anchors":1},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"eedbc4e2-ab54-495e-ac30-86dcbe84fe9d"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bPLxtVE6DIY5FREytMiIQG/gOdy4i0gVdw+eOT9I2UKv678lUFXWh/d1j9YqwJaV1aNnn78EEJMrcuUEtdGeAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T17:45:36.623249Z"},"content_sha256":"d048f5aa60463aabee915be47d5b2c568c8ffece069bbca2bedbd8eb9822dc89","schema_version":"1.0","event_id":"sha256:d048f5aa60463aabee915be47d5b2c568c8ffece069bbca2bedbd8eb9822dc89"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4N7ABM2UUZYZF3F36EJNP5OP3X/bundle.json","state_url":"https://pith.science/pith/4N7ABM2UUZYZF3F36EJNP5OP3X/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4N7ABM2UUZYZF3F36EJNP5OP3X/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T17:45:36Z","links":{"resolver":"https://pith.science/pith/4N7ABM2UUZYZF3F36EJNP5OP3X","bundle":"https://pith.science/pith/4N7ABM2UUZYZF3F36EJNP5OP3X/bundle.json","state":"https://pith.science/pith/4N7ABM2UUZYZF3F36EJNP5OP3X/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4N7ABM2UUZYZF3F36EJNP5OP3X/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4N7ABM2UUZYZF3F36EJNP5OP3X","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ac7f3abda1c207275f824ce485f77e876d5ba85470924061ab5c3b6f26205530","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T02:24:57Z","title_canon_sha256":"7346a51bd392f998a0063714cadaeae5b23ff6c39a3e59ea82cfeb86b831ca80"},"schema_version":"1.0","source":{"id":"2605.12899","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12899","created_at":"2026-05-18T03:09:10Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12899v1","created_at":"2026-05-18T03:09:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12899","created_at":"2026-05-18T03:09:10Z"},{"alias_kind":"pith_short_12","alias_value":"4N7ABM2UUZYZ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"4N7ABM2UUZYZF3F3","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"4N7ABM2U","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:d048f5aa60463aabee915be47d5b2c568c8ffece069bbca2bedbd8eb9822dc89","target":"graph","created_at":"2026-05-18T03:09:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Theoretically, we prove that our design bounds the worst-case mean squared error of the estimated treatment effect."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The unified framework assumes that model misspecification can be controlled within a single design that covers both contextual bandit and dynamic settings, but the abstract provides no details on the precise class of misspecification or the conditions required for the bound to hold."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A unified robust framework for sequential A/B testing bounds the worst-case mean squared error of treatment effect estimates under model misspecification in both contextual bandit and dynamic regimes."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Robust sequential experimental design bounds the worst-case mean squared error of estimated treatment effects in A/B testing under model misspecification."}],"snapshot_sha256":"a9ffbdc626b15d84ba149c5a172381eb1cf5b486606f6513b5f22f9e438a854c"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Experimental design has emerged as a powerful approach for improving the sample efficiency of A/B testing, yet existing designs rely critically on correctly specified models. We study robust sequential experimental design under model misspecification and develop a unified framework that covers both contextual bandit and dynamic settings. Theoretically, we prove that our design bounds the worst-case mean squared error of the estimated treatment effect. Empirically, we demonstrate the effectiveness of the proposed approach using synthetic and real-world datasets from a leading technology company","authors_text":"Chengchun Shi, Hongtu Zhu, Niansheng Tang, Qianglin Wen, Ting Li, Xiangkun Wu, Yingying Zhang","cross_cats":["cs.LG"],"headline":"Robust sequential experimental design bounds the worst-case mean squared error of estimated treatment effects in A/B testing under model misspecification.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T02:24:57Z","title":"Robust Sequential Experimental Design for A/B Testing"},"references":{"count":25,"internal_anchors":1,"resolved_work":25,"sample":[{"cited_arxiv_id":"","doi":"10.1093/jrsssb/qkad072","is_internal_anchor":false,"ref_index":1,"title":"doi: 10.1093/jrsssb/qkad072. Atkinson, A. C. Optimum biased coin designs for sequential clinical trials with prognostic factors.Biometrika, 69(1): 61–67,","work_id":"64d7c3b7-a1a9-49d2-af4e-d497ec034a8f","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"arXiv preprint arXiv:2112.13495 , year=","work_id":"c85a376e-42ba-4630-9ad4-5f6bfa6fbd4c","year":null},{"cited_arxiv_id":"","doi":"10.1287/mnsc.2019.3424","is_internal_anchor":false,"ref_index":3,"title":"URL https: //doi.org/10.1287/mnsc.2019.3424","work_id":"402d6e31-7986-4385-b1c4-841c353ecd04","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Budgeted active experimen- tation for treatment effect estimation from observational and randomized data.arXiv preprint arXiv:2602.22021,","work_id":"947bac34-41ac-421f-b632-9b25bdb97a5e","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Detecting interference in A/B testing with increasing allocation.arXiv preprint arXiv:2211.03262,","work_id":"a0ef6531-c11f-47c8-9ddc-c06b5352ad9c","year":null}],"snapshot_sha256":"07d8d65218bb8c3342338ce1a5e9cad828494e815e6f9fe9c8d06e0ae29a5667"},"source":{"id":"2605.12899","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T19:11:48.650031Z","id":"eedbc4e2-ab54-495e-ac30-86dcbe84fe9d","model_set":{"reader":"grok-4.3"},"one_line_summary":"A unified robust framework for sequential A/B testing bounds the worst-case mean squared error of treatment effect estimates under model misspecification in both contextual bandit and dynamic regimes.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Robust sequential experimental design bounds the worst-case mean squared error of estimated treatment effects in A/B testing under model misspecification.","strongest_claim":"Theoretically, we prove that our design bounds the worst-case mean squared error of the estimated treatment effect.","weakest_assumption":"The unified framework assumes that model misspecification can be controlled within a single design that covers both contextual bandit and dynamic settings, but the abstract provides no details on the precise class of misspecification or the conditions required for the bound to hold."}},"verdict_id":"eedbc4e2-ab54-495e-ac30-86dcbe84fe9d"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f4356c9d36339ad31156bd0fdf9d2d1304a5d467166ee6735ed2ab44291b3231","target":"record","created_at":"2026-05-18T03:09:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ac7f3abda1c207275f824ce485f77e876d5ba85470924061ab5c3b6f26205530","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-13T02:24:57Z","title_canon_sha256":"7346a51bd392f998a0063714cadaeae5b23ff6c39a3e59ea82cfeb86b831ca80"},"schema_version":"1.0","source":{"id":"2605.12899","kind":"arxiv","version":1}},"canonical_sha256":"e37e00b354a67192ecbbf112d7f5cfddec09f483c9ddfb6999752e88d6ca355f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e37e00b354a67192ecbbf112d7f5cfddec09f483c9ddfb6999752e88d6ca355f","first_computed_at":"2026-05-18T03:09:10.766621Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:10.766621Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lv2dcR//mM8rzvIU8LJsvdRnIgVC3ui33ky/MElEb38/zrxhpEyKS7MRxVQzPkAtD3bDK7wgpZvlePiCmCU7Ag==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:10.767288Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12899","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f4356c9d36339ad31156bd0fdf9d2d1304a5d467166ee6735ed2ab44291b3231","sha256:d048f5aa60463aabee915be47d5b2c568c8ffece069bbca2bedbd8eb9822dc89"],"state_sha256":"f977b6f6abc895e2692effafed1870858bd6ccb66b7dac1c98ac15fd9e70c11a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IdokX1GXJXsYB3y+YyOYtfOg3zCt2j0NxlGFlwCGc/Ti/FpI7dhamFDqQPXN8YbTA1/mtbKL0vZ+oD7qNd8zAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T17:45:36.626769Z","bundle_sha256":"e0bb8ed98ea66352a6206ec4002086c47a27d6e2ba51e6d9afb9ab3081ef3a17"}}