{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:PUSHK26RYUT65IRF2HJ47GK7W2","short_pith_number":"pith:PUSHK26R","canonical_record":{"source":{"id":"2303.17564","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-03-30T17:30:36Z","cross_cats_sorted":["cs.AI","cs.CL","q-fin.GN"],"title_canon_sha256":"ce000b4019446a1232badc49fdffe0f3fa25a4751e0d195e87cf1b1f46bd0aff","abstract_canon_sha256":"3c618fff827861fdc6b1d501a9e1e1d2a66362df35cf96dc55522e3df3e43035"},"schema_version":"1.0"},"canonical_sha256":"7d24756bd1c527eea225d1d3cf995fb6a5eaea0be5362d70ab1712618bfb7c58","source":{"kind":"arxiv","id":"2303.17564","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2303.17564","created_at":"2026-05-18T03:47:53Z"},{"alias_kind":"arxiv_version","alias_value":"2303.17564v3","created_at":"2026-05-18T03:47:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2303.17564","created_at":"2026-05-18T03:47:53Z"},{"alias_kind":"pith_short_12","alias_value":"PUSHK26RYUT6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PUSHK26RYUT65IRF","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PUSHK26R","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:PUSHK26RYUT65IRF2HJ47GK7W2","target":"record","payload":{"canonical_record":{"source":{"id":"2303.17564","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-03-30T17:30:36Z","cross_cats_sorted":["cs.AI","cs.CL","q-fin.GN"],"title_canon_sha256":"ce000b4019446a1232badc49fdffe0f3fa25a4751e0d195e87cf1b1f46bd0aff","abstract_canon_sha256":"3c618fff827861fdc6b1d501a9e1e1d2a66362df35cf96dc55522e3df3e43035"},"schema_version":"1.0"},"canonical_sha256":"7d24756bd1c527eea225d1d3cf995fb6a5eaea0be5362d70ab1712618bfb7c58","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:47:53.394418Z","signature_b64":"xIVEqz99PRJVIkww6RxvXbQKyYUeKtNSnM5iyu+o5xqBDo9JqiA+RiKV3m4TP8grTzasVg+hc1QhFm41KF/4AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7d24756bd1c527eea225d1d3cf995fb6a5eaea0be5362d70ab1712618bfb7c58","last_reissued_at":"2026-05-18T03:47:53.393747Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:47:53.393747Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2303.17564","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:47:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"s6IhuEAkNkawgC+vMiYwMOTCvbC0oximDsXF+yxFdqWmX8Yys1UhWTKJHkxFe83t+tL1iyJckov5C1dUuNNKDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T12:00:39.225003Z"},"content_sha256":"7210791d8a5bb68e09c4fe10d238cc434e2c3da81746cf304a9b00e7bc30a149","schema_version":"1.0","event_id":"sha256:7210791d8a5bb68e09c4fe10d238cc434e2c3da81746cf304a9b00e7bc30a149"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:PUSHK26RYUT65IRF2HJ47GK7W2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"BloombergGPT: A Large Language Model for Finance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"BloombergGPT, a 50 billion parameter model trained on financial plus general data, outperforms prior models on financial tasks while preserving general LLM performance.","cross_cats":["cs.AI","cs.CL","q-fin.GN"],"primary_cat":"cs.LG","authors_text":"David Rosenberg, Gideon Mann, Mark Dredze, Ozan Irsoy, Prabhanjan Kambadur, Sebastian Gehrmann, Shijie Wu, Steven Lu, Vadim Dabravolski","submitted_at":"2023-03-30T17:30:36Z","abstract_excerpt":"The use of NLP in the realm of financial technology is broad and complex, with applications ranging from sentiment analysis and named entity recognition to question answering. Large Language Models (LLMs) have been shown to be effective on a variety of tasks; however, no LLM specialized for the financial domain has been reported in literature. In this work, we present BloombergGPT, a 50 billion parameter language model that is trained on a wide range of financial data. We construct a 363 billion token dataset based on Bloomberg's extensive data sources, perhaps the largest domain-specific data"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our mixed dataset training leads to a model that outperforms existing models on financial tasks by significant margins without sacrificing performance on general LLM benchmarks.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the internal benchmarks and chosen financial data sources accurately reflect real-world usage and that the performance gains are not due to dataset-specific artifacts or evaluation choices.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"BloombergGPT is a 50B parameter LLM trained on a 708B token mixed financial and general dataset that outperforms prior models on financial benchmarks while preserving general LLM performance.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"BloombergGPT, a 50 billion parameter model trained on financial plus general data, outperforms prior models on financial tasks while preserving general LLM performance.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a89833c5bc818881b7ff6f8cb2096d8697ff892e4f0cd9653527f9be64826198"},"source":{"id":"2303.17564","kind":"arxiv","version":3},"verdict":{"id":"fb5079f8-d635-4cc5-8715-ec8dd5937eec","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-13T23:14:46.985302Z","strongest_claim":"Our mixed dataset training leads to a model that outperforms existing models on financial tasks by significant margins without sacrificing performance on general LLM benchmarks.","one_line_summary":"BloombergGPT is a 50B parameter LLM trained on a 708B token mixed financial and general dataset that outperforms prior models on financial benchmarks while preserving general LLM performance.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the internal benchmarks and chosen financial data sources accurately reflect real-world usage and that the performance gains are not due to dataset-specific artifacts or evaluation choices.","pith_extraction_headline":"BloombergGPT, a 50 billion parameter model trained on financial plus general data, outperforms prior models on financial tasks while preserving general LLM performance."},"references":{"count":140,"sample":[{"doi":"","year":1908,"title":"FinBERT: Financial Sentiment Analysis with Pre-trained Language Models","work_id":"3dd01f6f-6c0f-4a47-a6dd-6ca15bc4e219","ref_index":1,"cited_arxiv_id":"1908.10063","is_internal_anchor":true},{"doi":"","year":2022,"title":"PLATO - XL : Exploring the large-scale pre-training of dialogue generation","work_id":"c1c6d296-00b7-46ca-b0d9-8274a884fa42","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.18653/v1/d19-1371","year":2019,"title":"S ci BERT : A pretrained language model for scientific text","work_id":"f4322284-a0f8-4f17-b855-c0eccacb4545","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"On the dangers of stochastic parrots: Can language models be too big? In Proceedings of the 2021 ACM conference on fairness, accountability, and transparency, pages 610--623","work_id":"717f4c67-3986-48a0-b9f6-50e30b658cbf","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2009,"title":"The fifth PASCAL recognizing textual entailment challenge","work_id":"b6e103a5-a597-4905-b033-4a41b167e7ac","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":140,"snapshot_sha256":"0af8286cfd92006a17cd11e70fb5a030bf5babcb880c2ba0c7e9ded791457812","internal_anchors":32},"formal_canon":{"evidence_count":2,"snapshot_sha256":"22a43aa6f5f72f4089a51719a6fac9a632a9ea4598046a111477a4a1d49cda8f"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"fb5079f8-d635-4cc5-8715-ec8dd5937eec"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:47:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QnI/9I0A6Hc5XccqDtz37OnfkGd3312aoH8f4Ja2v45UTAkzkW9W1U3pvCO471wl40pG5D6uYzNLA0VGqnZ9AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T12:00:39.226046Z"},"content_sha256":"194d7aab41deb111d3551684993c53aec7e336f3c01602530243c27eacddb5e2","schema_version":"1.0","event_id":"sha256:194d7aab41deb111d3551684993c53aec7e336f3c01602530243c27eacddb5e2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PUSHK26RYUT65IRF2HJ47GK7W2/bundle.json","state_url":"https://pith.science/pith/PUSHK26RYUT65IRF2HJ47GK7W2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PUSHK26RYUT65IRF2HJ47GK7W2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T12:00:39Z","links":{"resolver":"https://pith.science/pith/PUSHK26RYUT65IRF2HJ47GK7W2","bundle":"https://pith.science/pith/PUSHK26RYUT65IRF2HJ47GK7W2/bundle.json","state":"https://pith.science/pith/PUSHK26RYUT65IRF2HJ47GK7W2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PUSHK26RYUT65IRF2HJ47GK7W2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:PUSHK26RYUT65IRF2HJ47GK7W2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3c618fff827861fdc6b1d501a9e1e1d2a66362df35cf96dc55522e3df3e43035","cross_cats_sorted":["cs.AI","cs.CL","q-fin.GN"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-03-30T17:30:36Z","title_canon_sha256":"ce000b4019446a1232badc49fdffe0f3fa25a4751e0d195e87cf1b1f46bd0aff"},"schema_version":"1.0","source":{"id":"2303.17564","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2303.17564","created_at":"2026-05-18T03:47:53Z"},{"alias_kind":"arxiv_version","alias_value":"2303.17564v3","created_at":"2026-05-18T03:47:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2303.17564","created_at":"2026-05-18T03:47:53Z"},{"alias_kind":"pith_short_12","alias_value":"PUSHK26RYUT6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"PUSHK26RYUT65IRF","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"PUSHK26R","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:194d7aab41deb111d3551684993c53aec7e336f3c01602530243c27eacddb5e2","target":"graph","created_at":"2026-05-18T03:47:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our mixed dataset training leads to a model that outperforms existing models on financial tasks by significant margins without sacrificing performance on general LLM benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the internal benchmarks and chosen financial data sources accurately reflect real-world usage and that the performance gains are not due to dataset-specific artifacts or evaluation choices."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"BloombergGPT is a 50B parameter LLM trained on a 708B token mixed financial and general dataset that outperforms prior models on financial benchmarks while preserving general LLM performance."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"BloombergGPT, a 50 billion parameter model trained on financial plus general data, outperforms prior models on financial tasks while preserving general LLM performance."}],"snapshot_sha256":"a89833c5bc818881b7ff6f8cb2096d8697ff892e4f0cd9653527f9be64826198"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"22a43aa6f5f72f4089a51719a6fac9a632a9ea4598046a111477a4a1d49cda8f"},"paper":{"abstract_excerpt":"The use of NLP in the realm of financial technology is broad and complex, with applications ranging from sentiment analysis and named entity recognition to question answering. Large Language Models (LLMs) have been shown to be effective on a variety of tasks; however, no LLM specialized for the financial domain has been reported in literature. In this work, we present BloombergGPT, a 50 billion parameter language model that is trained on a wide range of financial data. We construct a 363 billion token dataset based on Bloomberg's extensive data sources, perhaps the largest domain-specific data","authors_text":"David Rosenberg, Gideon Mann, Mark Dredze, Ozan Irsoy, Prabhanjan Kambadur, Sebastian Gehrmann, Shijie Wu, Steven Lu, Vadim Dabravolski","cross_cats":["cs.AI","cs.CL","q-fin.GN"],"headline":"BloombergGPT, a 50 billion parameter model trained on financial plus general data, outperforms prior models on financial tasks while preserving general LLM performance.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-03-30T17:30:36Z","title":"BloombergGPT: A Large Language Model for Finance"},"references":{"count":140,"internal_anchors":32,"resolved_work":140,"sample":[{"cited_arxiv_id":"1908.10063","doi":"","is_internal_anchor":true,"ref_index":1,"title":"FinBERT: Financial Sentiment Analysis with Pre-trained Language Models","work_id":"3dd01f6f-6c0f-4a47-a6dd-6ca15bc4e219","year":1908},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"PLATO - XL : Exploring the large-scale pre-training of dialogue generation","work_id":"c1c6d296-00b7-46ca-b0d9-8274a884fa42","year":2022},{"cited_arxiv_id":"","doi":"10.18653/v1/d19-1371","is_internal_anchor":false,"ref_index":3,"title":"S ci BERT : A pretrained language model for scientific text","work_id":"f4322284-a0f8-4f17-b855-c0eccacb4545","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"On the dangers of stochastic parrots: Can language models be too big? In Proceedings of the 2021 ACM conference on fairness, accountability, and transparency, pages 610--623","work_id":"717f4c67-3986-48a0-b9f6-50e30b658cbf","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"The fifth PASCAL recognizing textual entailment challenge","work_id":"b6e103a5-a597-4905-b033-4a41b167e7ac","year":2009}],"snapshot_sha256":"0af8286cfd92006a17cd11e70fb5a030bf5babcb880c2ba0c7e9ded791457812"},"source":{"id":"2303.17564","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-13T23:14:46.985302Z","id":"fb5079f8-d635-4cc5-8715-ec8dd5937eec","model_set":{"reader":"grok-4.3"},"one_line_summary":"BloombergGPT is a 50B parameter LLM trained on a 708B token mixed financial and general dataset that outperforms prior models on financial benchmarks while preserving general LLM performance.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"BloombergGPT, a 50 billion parameter model trained on financial plus general data, outperforms prior models on financial tasks while preserving general LLM performance.","strongest_claim":"Our mixed dataset training leads to a model that outperforms existing models on financial tasks by significant margins without sacrificing performance on general LLM benchmarks.","weakest_assumption":"That the internal benchmarks and chosen financial data sources accurately reflect real-world usage and that the performance gains are not due to dataset-specific artifacts or evaluation choices."}},"verdict_id":"fb5079f8-d635-4cc5-8715-ec8dd5937eec"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7210791d8a5bb68e09c4fe10d238cc434e2c3da81746cf304a9b00e7bc30a149","target":"record","created_at":"2026-05-18T03:47:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3c618fff827861fdc6b1d501a9e1e1d2a66362df35cf96dc55522e3df3e43035","cross_cats_sorted":["cs.AI","cs.CL","q-fin.GN"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-03-30T17:30:36Z","title_canon_sha256":"ce000b4019446a1232badc49fdffe0f3fa25a4751e0d195e87cf1b1f46bd0aff"},"schema_version":"1.0","source":{"id":"2303.17564","kind":"arxiv","version":3}},"canonical_sha256":"7d24756bd1c527eea225d1d3cf995fb6a5eaea0be5362d70ab1712618bfb7c58","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7d24756bd1c527eea225d1d3cf995fb6a5eaea0be5362d70ab1712618bfb7c58","first_computed_at":"2026-05-18T03:47:53.393747Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:47:53.393747Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"xIVEqz99PRJVIkww6RxvXbQKyYUeKtNSnM5iyu+o5xqBDo9JqiA+RiKV3m4TP8grTzasVg+hc1QhFm41KF/4AQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:47:53.394418Z","signed_message":"canonical_sha256_bytes"},"source_id":"2303.17564","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7210791d8a5bb68e09c4fe10d238cc434e2c3da81746cf304a9b00e7bc30a149","sha256:194d7aab41deb111d3551684993c53aec7e336f3c01602530243c27eacddb5e2"],"state_sha256":"5938975ead5647958d0ee2ee3e751d146492354e042c6b8b2fa021ff4b25e6e9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4Rlp9aK5CrvQxe9FfNXZstX2NO5nFlBXANFZ2GjTRIl8TjhdoR+jwetaw+C4/7sCqTXDhkhpqR2p1PLSXfnCAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T12:00:39.229286Z","bundle_sha256":"a77de1fe7d2281000bc2db100ac4f4130c0c4e35792be70c4f4a9cf4b6e29104"}}