{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2022:3YDXEUQSWA63WKHNHAQJUHOC3G","short_pith_number":"pith:3YDXEUQS","canonical_record":{"source":{"id":"2210.03057","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-10-06T17:03:34Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"781e2d2fc0372114d9092ec058b5b0795ebb6bd530c29b7bd58c4064d348483d","abstract_canon_sha256":"992c71f9089ceabc44e1ab244d0fc7bd5f3d12c95c1151a0918fb0675d2fa896"},"schema_version":"1.0"},"canonical_sha256":"de07725212b03dbb28ed38209a1dc2d9ad3b9bc9050282169cf2c3b6cf22949e","source":{"kind":"arxiv","id":"2210.03057","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2210.03057","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2210.03057v1","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2210.03057","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"3YDXEUQSWA63","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"3YDXEUQSWA63WKHN","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"3YDXEUQS","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2022:3YDXEUQSWA63WKHNHAQJUHOC3G","target":"record","payload":{"canonical_record":{"source":{"id":"2210.03057","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-10-06T17:03:34Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"781e2d2fc0372114d9092ec058b5b0795ebb6bd530c29b7bd58c4064d348483d","abstract_canon_sha256":"992c71f9089ceabc44e1ab244d0fc7bd5f3d12c95c1151a0918fb0675d2fa896"},"schema_version":"1.0"},"canonical_sha256":"de07725212b03dbb28ed38209a1dc2d9ad3b9bc9050282169cf2c3b6cf22949e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:50.174218Z","signature_b64":"l3PQ/aL81/CZ5IvFSLrpvIJ4RfCYxmQq2mFwmeH1AQdBnBzEYy5EvKzmtgcIXdW2ovMeTI7D4+n/aVx1ulRwDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"de07725212b03dbb28ed38209a1dc2d9ad3b9bc9050282169cf2c3b6cf22949e","last_reissued_at":"2026-05-17T23:38:50.173722Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:50.173722Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2210.03057","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vAzKBLi5oqNNxLVYavGtPMYO/lfZvw274CtGr+mxwswIPf4l7oxC/FDg3P+fBUuRv3EknwQqbBJhfvh/mdOxBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T16:40:26.258757Z"},"content_sha256":"20b8811c901aa2114a8f7de91060e9781bf3ba48810377f4306fa36681dc2061","schema_version":"1.0","event_id":"sha256:20b8811c901aa2114a8f7de91060e9781bf3ba48810377f4306fa36681dc2061"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2022:3YDXEUQSWA63WKHNHAQJUHOC3G","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Language Models are Multilingual Chain-of-Thought Reasoners","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Large language models gain step-by-step reasoning ability across many languages as they scale up.","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Denny Zhou, Dipanjan Das, Freda Shi, Hyung Won Chung, Jason Wei, Markus Freitag, Mirac Suzgun, Sebastian Ruder, Soroush Vosoughi, Suraj Srivats, Xuezhi Wang, Yi Tay","submitted_at":"2022-10-06T17:03:34Z","abstract_excerpt":"We evaluate the reasoning abilities of large language models in multilingual settings. We introduce the Multilingual Grade School Math (MGSM) benchmark, by manually translating 250 grade-school math problems from the GSM8K dataset (Cobbe et al., 2021) into ten typologically diverse languages. We find that the ability to solve MGSM problems via chain-of-thought prompting emerges with increasing model scale, and that models have strikingly strong multilingual reasoning abilities, even in underrepresented languages such as Bengali and Swahili. Finally, we show that the multilingual reasoning abil"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"the ability to solve MGSM problems via chain-of-thought prompting emerges with increasing model scale, and that models have strikingly strong multilingual reasoning abilities, even in underrepresented languages such as Bengali and Swahili","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The manual translations preserve the original semantic meaning, logical structure, and difficulty level of the problems without introducing translation artifacts that would make the task easier or harder in non-English languages.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Large language models show strong chain-of-thought reasoning on math problems across ten languages, with abilities emerging at larger scales and extending to other reasoning tasks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Large language models gain step-by-step reasoning ability across many languages as they scale up.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4ea2e0772861ba44e2f429c2c5eb49075ee60d0ff4e8aba6886d4b8fb14e8d76"},"source":{"id":"2210.03057","kind":"arxiv","version":1},"verdict":{"id":"fe781533-2a31-46f0-947a-c93b09b9a8c9","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T21:02:25.175118Z","strongest_claim":"the ability to solve MGSM problems via chain-of-thought prompting emerges with increasing model scale, and that models have strikingly strong multilingual reasoning abilities, even in underrepresented languages such as Bengali and Swahili","one_line_summary":"Large language models show strong chain-of-thought reasoning on math problems across ten languages, with abilities emerging at larger scales and extending to other reasoning tasks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The manual translations preserve the original semantic meaning, logical structure, and difficulty level of the problems without introducing translation artifacts that would make the task easier or harder in non-English languages.","pith_extraction_headline":"Large language models gain step-by-step reasoning ability across many languages as they scale up."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"715a8b5436ec6450059d21727722178f6161cc06ae5fe91f5467fb17fbeb70f3"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"fe781533-2a31-46f0-947a-c93b09b9a8c9"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bL9LV4oCSAOkbFhjByDKSp4AhvVFaB7+EBiYqp788szY3UyTVodTk2WNz30X2hhjee1AwBg1fkgJi71OX+khAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T16:40:26.259652Z"},"content_sha256":"e46508afe5f3cb560c6cde1435016cf07e6a0389b0b58ff21dfc50a99b3c0a04","schema_version":"1.0","event_id":"sha256:e46508afe5f3cb560c6cde1435016cf07e6a0389b0b58ff21dfc50a99b3c0a04"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3YDXEUQSWA63WKHNHAQJUHOC3G/bundle.json","state_url":"https://pith.science/pith/3YDXEUQSWA63WKHNHAQJUHOC3G/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3YDXEUQSWA63WKHNHAQJUHOC3G/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T16:40:26Z","links":{"resolver":"https://pith.science/pith/3YDXEUQSWA63WKHNHAQJUHOC3G","bundle":"https://pith.science/pith/3YDXEUQSWA63WKHNHAQJUHOC3G/bundle.json","state":"https://pith.science/pith/3YDXEUQSWA63WKHNHAQJUHOC3G/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3YDXEUQSWA63WKHNHAQJUHOC3G/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2022:3YDXEUQSWA63WKHNHAQJUHOC3G","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"992c71f9089ceabc44e1ab244d0fc7bd5f3d12c95c1151a0918fb0675d2fa896","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-10-06T17:03:34Z","title_canon_sha256":"781e2d2fc0372114d9092ec058b5b0795ebb6bd530c29b7bd58c4064d348483d"},"schema_version":"1.0","source":{"id":"2210.03057","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2210.03057","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"arxiv_version","alias_value":"2210.03057v1","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2210.03057","created_at":"2026-05-17T23:38:50Z"},{"alias_kind":"pith_short_12","alias_value":"3YDXEUQSWA63","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"3YDXEUQSWA63WKHN","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"3YDXEUQS","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:e46508afe5f3cb560c6cde1435016cf07e6a0389b0b58ff21dfc50a99b3c0a04","target":"graph","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"the ability to solve MGSM problems via chain-of-thought prompting emerges with increasing model scale, and that models have strikingly strong multilingual reasoning abilities, even in underrepresented languages such as Bengali and Swahili"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The manual translations preserve the original semantic meaning, logical structure, and difficulty level of the problems without introducing translation artifacts that would make the task easier or harder in non-English languages."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Large language models show strong chain-of-thought reasoning on math problems across ten languages, with abilities emerging at larger scales and extending to other reasoning tasks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Large language models gain step-by-step reasoning ability across many languages as they scale up."}],"snapshot_sha256":"4ea2e0772861ba44e2f429c2c5eb49075ee60d0ff4e8aba6886d4b8fb14e8d76"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"715a8b5436ec6450059d21727722178f6161cc06ae5fe91f5467fb17fbeb70f3"},"paper":{"abstract_excerpt":"We evaluate the reasoning abilities of large language models in multilingual settings. We introduce the Multilingual Grade School Math (MGSM) benchmark, by manually translating 250 grade-school math problems from the GSM8K dataset (Cobbe et al., 2021) into ten typologically diverse languages. We find that the ability to solve MGSM problems via chain-of-thought prompting emerges with increasing model scale, and that models have strikingly strong multilingual reasoning abilities, even in underrepresented languages such as Bengali and Swahili. Finally, we show that the multilingual reasoning abil","authors_text":"Denny Zhou, Dipanjan Das, Freda Shi, Hyung Won Chung, Jason Wei, Markus Freitag, Mirac Suzgun, Sebastian Ruder, Soroush Vosoughi, Suraj Srivats, Xuezhi Wang, Yi Tay","cross_cats":["cs.AI","cs.LG"],"headline":"Large language models gain step-by-step reasoning ability across many languages as they scale up.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-10-06T17:03:34Z","title":"Language Models are Multilingual Chain-of-Thought Reasoners"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2210.03057","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T21:02:25.175118Z","id":"fe781533-2a31-46f0-947a-c93b09b9a8c9","model_set":{"reader":"grok-4.3"},"one_line_summary":"Large language models show strong chain-of-thought reasoning on math problems across ten languages, with abilities emerging at larger scales and extending to other reasoning tasks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Large language models gain step-by-step reasoning ability across many languages as they scale up.","strongest_claim":"the ability to solve MGSM problems via chain-of-thought prompting emerges with increasing model scale, and that models have strikingly strong multilingual reasoning abilities, even in underrepresented languages such as Bengali and Swahili","weakest_assumption":"The manual translations preserve the original semantic meaning, logical structure, and difficulty level of the problems without introducing translation artifacts that would make the task easier or harder in non-English languages."}},"verdict_id":"fe781533-2a31-46f0-947a-c93b09b9a8c9"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20b8811c901aa2114a8f7de91060e9781bf3ba48810377f4306fa36681dc2061","target":"record","created_at":"2026-05-17T23:38:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"992c71f9089ceabc44e1ab244d0fc7bd5f3d12c95c1151a0918fb0675d2fa896","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2022-10-06T17:03:34Z","title_canon_sha256":"781e2d2fc0372114d9092ec058b5b0795ebb6bd530c29b7bd58c4064d348483d"},"schema_version":"1.0","source":{"id":"2210.03057","kind":"arxiv","version":1}},"canonical_sha256":"de07725212b03dbb28ed38209a1dc2d9ad3b9bc9050282169cf2c3b6cf22949e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"de07725212b03dbb28ed38209a1dc2d9ad3b9bc9050282169cf2c3b6cf22949e","first_computed_at":"2026-05-17T23:38:50.173722Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:50.173722Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"l3PQ/aL81/CZ5IvFSLrpvIJ4RfCYxmQq2mFwmeH1AQdBnBzEYy5EvKzmtgcIXdW2ovMeTI7D4+n/aVx1ulRwDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:50.174218Z","signed_message":"canonical_sha256_bytes"},"source_id":"2210.03057","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20b8811c901aa2114a8f7de91060e9781bf3ba48810377f4306fa36681dc2061","sha256:e46508afe5f3cb560c6cde1435016cf07e6a0389b0b58ff21dfc50a99b3c0a04"],"state_sha256":"849d44b57965b08d905f2cc9c82bef22fd8f5e7c60928cf78bd4665f6776e4bf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o1FjBjAtTDGz3JXX2Ajt1Xk4zV58SffrR9UG560ZjLMgkBB0SDPQ+5frU9nH90drt0b/IRt3Vospvfv+iT08DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T16:40:26.264002Z","bundle_sha256":"28996a01360dc64c5f6134a01809f41d55bca136c7410be6eb0e9d861783a944"}}