{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:5CYHBLTGNBUTKIN4Q5JWUHMJOQ","short_pith_number":"pith:5CYHBLTG","canonical_record":{"source":{"id":"2603.03538","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-03T21:50:14Z","cross_cats_sorted":[],"title_canon_sha256":"9f081ca9c91bfcdd4ede8dfe92fe1b6f20dbf71aa4aceb8593b1f20270a0d9a2","abstract_canon_sha256":"9534669cf96ec8e3ec35b9effd02fa96eebd71a1e520fbf9f63a991e5469dc41"},"schema_version":"1.0"},"canonical_sha256":"e8b070ae6668693521bc87536a1d89743b621bc0ce393524b393dc0a76890452","source":{"kind":"arxiv","id":"2603.03538","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.03538","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"arxiv_version","alias_value":"2603.03538v3","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.03538","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"pith_short_12","alias_value":"5CYHBLTGNBUT","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"pith_short_16","alias_value":"5CYHBLTGNBUTKIN4","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"pith_short_8","alias_value":"5CYHBLTG","created_at":"2026-05-20T00:02:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:5CYHBLTGNBUTKIN4Q5JWUHMJOQ","target":"record","payload":{"canonical_record":{"source":{"id":"2603.03538","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-03T21:50:14Z","cross_cats_sorted":[],"title_canon_sha256":"9f081ca9c91bfcdd4ede8dfe92fe1b6f20dbf71aa4aceb8593b1f20270a0d9a2","abstract_canon_sha256":"9534669cf96ec8e3ec35b9effd02fa96eebd71a1e520fbf9f63a991e5469dc41"},"schema_version":"1.0"},"canonical_sha256":"e8b070ae6668693521bc87536a1d89743b621bc0ce393524b393dc0a76890452","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:10.378875Z","signature_b64":"mTxeJhi5ee5g7dgipDgVZVQxAuVastlCMyKJ3EwU4KPDPPBQRrhcHHKtEw01tYIl+tPQPM4Mxzdp8MC6mR6LBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e8b070ae6668693521bc87536a1d89743b621bc0ce393524b393dc0a76890452","last_reissued_at":"2026-05-20T00:02:10.378087Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:10.378087Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.03538","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Hd+V6JGZ6TUmDUPQ4Npz2gK2+IOgnrAOOoAqoInPlst9aTJX9AHFeB6Rfy+HZne+ZaVPXRbum65Qom9o3scoBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:13:08.975817Z"},"content_sha256":"b14d12930208a7db4e1a576a5728849e5cd4d9dce4d67849f32ab151e01fe23c","schema_version":"1.0","event_id":"sha256:b14d12930208a7db4e1a576a5728849e5cd4d9dce4d67849f32ab151e01fe23c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:5CYHBLTGNBUTKIN4Q5JWUHMJOQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Online Learnability of Chain-of-Thought Verifiers: Soundness and Completeness Trade-offs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Extensions of the Littlestone dimension tightly characterize the online mistake bounds for learning chain-of-thought verifiers under asymmetric soundness and completeness costs.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Avrim Blum, Dravyansh Sharma, Kiriaki Fragkia, Maria-Florina Balcan, Zhiyuan Li","submitted_at":"2026-03-03T21:50:14Z","abstract_excerpt":"Large Language Models (LLMs) with chain-of-thought generation have demonstrated great potential for solving complex reasoning and planning tasks. However, the output of current LLMs is not fully reliable and needs careful verification. Even if LLMs get more accurate over time, learned verifiers can help increase trust, enforce safety constraints, and ensure alignment with personal preferences. A major challenge in learning verifiers, however, especially when their output will be used by the generator to improve its reasoning, is that the feedback loop between generator and verifier may produce"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We introduce novel extensions of the Littlestone dimension which tightly characterize the mistake bounds for learning a verifier in the realizable setting. We provide optimal algorithms for finding the Pareto-frontier (the smallest total number of mistakes given a budget of soundness mistakes) as well as for minimizing a linear combination of asymmetric costs.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"With the mild assumption that one of the generators can generate the next reasoning step correctly with some minimal probability, we show how to learn a strong generator with small error and abstention rates.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"The paper shows that chain-of-thought verifiers are online learnable via novel extensions of the Littlestone dimension that characterize soundness and completeness mistake bounds, with algorithms for Pareto-optimal trade-offs and boosting weak generators under a mild assumption.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Extensions of the Littlestone dimension tightly characterize the online mistake bounds for learning chain-of-thought verifiers under asymmetric soundness and completeness costs.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"e22ff4eb8788df3dcdaa1fc8d90c7d2e5f5755c4e20d007c286e303efb38199d"},"source":{"id":"2603.03538","kind":"arxiv","version":3},"verdict":{"id":"b37fbca2-189a-42fd-9384-ce411a9b30fa","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T16:13:58.700762Z","strongest_claim":"We introduce novel extensions of the Littlestone dimension which tightly characterize the mistake bounds for learning a verifier in the realizable setting. We provide optimal algorithms for finding the Pareto-frontier (the smallest total number of mistakes given a budget of soundness mistakes) as well as for minimizing a linear combination of asymmetric costs.","one_line_summary":"The paper shows that chain-of-thought verifiers are online learnable via novel extensions of the Littlestone dimension that characterize soundness and completeness mistake bounds, with algorithms for Pareto-optimal trade-offs and boosting weak generators under a mild assumption.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"With the mild assumption that one of the generators can generate the next reasoning step correctly with some minimal probability, we show how to learn a strong generator with small error and abstention rates.","pith_extraction_headline":"Extensions of the Littlestone dimension tightly characterize the online mistake bounds for learning chain-of-thought verifiers under asymmetric soundness and completeness costs."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.03538/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"b37fbca2-189a-42fd-9384-ce411a9b30fa"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DySn90He04KiD3+ByBgMsG0hjRbEAxg4Ao6w2TyjZBn4B4CqHh2/UnCKJkXCu/j/PaNQT8YIoEmQxjRogydEBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:13:08.976833Z"},"content_sha256":"667e6ae3564947ae6ad55a5121fad5e95c6cd2a8816fd8ca008f5f03107e59f9","schema_version":"1.0","event_id":"sha256:667e6ae3564947ae6ad55a5121fad5e95c6cd2a8816fd8ca008f5f03107e59f9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ/bundle.json","state_url":"https://pith.science/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T21:13:08Z","links":{"resolver":"https://pith.science/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ","bundle":"https://pith.science/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ/bundle.json","state":"https://pith.science/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5CYHBLTGNBUTKIN4Q5JWUHMJOQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:5CYHBLTGNBUTKIN4Q5JWUHMJOQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9534669cf96ec8e3ec35b9effd02fa96eebd71a1e520fbf9f63a991e5469dc41","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-03T21:50:14Z","title_canon_sha256":"9f081ca9c91bfcdd4ede8dfe92fe1b6f20dbf71aa4aceb8593b1f20270a0d9a2"},"schema_version":"1.0","source":{"id":"2603.03538","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.03538","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"arxiv_version","alias_value":"2603.03538v3","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.03538","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"pith_short_12","alias_value":"5CYHBLTGNBUT","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"pith_short_16","alias_value":"5CYHBLTGNBUTKIN4","created_at":"2026-05-20T00:02:10Z"},{"alias_kind":"pith_short_8","alias_value":"5CYHBLTG","created_at":"2026-05-20T00:02:10Z"}],"graph_snapshots":[{"event_id":"sha256:667e6ae3564947ae6ad55a5121fad5e95c6cd2a8816fd8ca008f5f03107e59f9","target":"graph","created_at":"2026-05-20T00:02:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We introduce novel extensions of the Littlestone dimension which tightly characterize the mistake bounds for learning a verifier in the realizable setting. We provide optimal algorithms for finding the Pareto-frontier (the smallest total number of mistakes given a budget of soundness mistakes) as well as for minimizing a linear combination of asymmetric costs."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"With the mild assumption that one of the generators can generate the next reasoning step correctly with some minimal probability, we show how to learn a strong generator with small error and abstention rates."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"The paper shows that chain-of-thought verifiers are online learnable via novel extensions of the Littlestone dimension that characterize soundness and completeness mistake bounds, with algorithms for Pareto-optimal trade-offs and boosting weak generators under a mild assumption."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Extensions of the Littlestone dimension tightly characterize the online mistake bounds for learning chain-of-thought verifiers under asymmetric soundness and completeness costs."}],"snapshot_sha256":"e22ff4eb8788df3dcdaa1fc8d90c7d2e5f5755c4e20d007c286e303efb38199d"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.03538/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large Language Models (LLMs) with chain-of-thought generation have demonstrated great potential for solving complex reasoning and planning tasks. However, the output of current LLMs is not fully reliable and needs careful verification. Even if LLMs get more accurate over time, learned verifiers can help increase trust, enforce safety constraints, and ensure alignment with personal preferences. A major challenge in learning verifiers, however, especially when their output will be used by the generator to improve its reasoning, is that the feedback loop between generator and verifier may produce","authors_text":"Avrim Blum, Dravyansh Sharma, Kiriaki Fragkia, Maria-Florina Balcan, Zhiyuan Li","cross_cats":[],"headline":"Extensions of the Littlestone dimension tightly characterize the online mistake bounds for learning chain-of-thought verifiers under asymmetric soundness and completeness costs.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-03T21:50:14Z","title":"Online Learnability of Chain-of-Thought Verifiers: Soundness and Completeness Trade-offs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.03538","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-15T16:13:58.700762Z","id":"b37fbca2-189a-42fd-9384-ce411a9b30fa","model_set":{"reader":"grok-4.3"},"one_line_summary":"The paper shows that chain-of-thought verifiers are online learnable via novel extensions of the Littlestone dimension that characterize soundness and completeness mistake bounds, with algorithms for Pareto-optimal trade-offs and boosting weak generators under a mild assumption.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Extensions of the Littlestone dimension tightly characterize the online mistake bounds for learning chain-of-thought verifiers under asymmetric soundness and completeness costs.","strongest_claim":"We introduce novel extensions of the Littlestone dimension which tightly characterize the mistake bounds for learning a verifier in the realizable setting. We provide optimal algorithms for finding the Pareto-frontier (the smallest total number of mistakes given a budget of soundness mistakes) as well as for minimizing a linear combination of asymmetric costs.","weakest_assumption":"With the mild assumption that one of the generators can generate the next reasoning step correctly with some minimal probability, we show how to learn a strong generator with small error and abstention rates."}},"verdict_id":"b37fbca2-189a-42fd-9384-ce411a9b30fa"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b14d12930208a7db4e1a576a5728849e5cd4d9dce4d67849f32ab151e01fe23c","target":"record","created_at":"2026-05-20T00:02:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9534669cf96ec8e3ec35b9effd02fa96eebd71a1e520fbf9f63a991e5469dc41","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-03-03T21:50:14Z","title_canon_sha256":"9f081ca9c91bfcdd4ede8dfe92fe1b6f20dbf71aa4aceb8593b1f20270a0d9a2"},"schema_version":"1.0","source":{"id":"2603.03538","kind":"arxiv","version":3}},"canonical_sha256":"e8b070ae6668693521bc87536a1d89743b621bc0ce393524b393dc0a76890452","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e8b070ae6668693521bc87536a1d89743b621bc0ce393524b393dc0a76890452","first_computed_at":"2026-05-20T00:02:10.378087Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:10.378087Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mTxeJhi5ee5g7dgipDgVZVQxAuVastlCMyKJ3EwU4KPDPPBQRrhcHHKtEw01tYIl+tPQPM4Mxzdp8MC6mR6LBA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:10.378875Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.03538","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b14d12930208a7db4e1a576a5728849e5cd4d9dce4d67849f32ab151e01fe23c","sha256:667e6ae3564947ae6ad55a5121fad5e95c6cd2a8816fd8ca008f5f03107e59f9"],"state_sha256":"aba7affc8cd28bc4284411df7b8117c9f5727b3fa778eee1ea5f4c2f568621c3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Aa7Q9gAbFHm7HX05+0s0Zz1dfi68HWaaANRHKIoRcaQBLc269y0cbE5m2DbMMpOeNYl7Hrg8I/7IB1VcJ24mAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T21:13:08.981138Z","bundle_sha256":"b96562bc4dd896f57b0368830748203fdb3c51f1189a12ce14a791e1795e0234"}}