{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:Y4Q44JX3OQHZLG5HME4SB7MPIQ","short_pith_number":"pith:Y4Q44JX3","schema_version":"1.0","canonical_sha256":"c721ce26fb740f959ba7613920fd8f440b40ef3b9066680b13fa2e947e36df2d","source":{"kind":"arxiv","id":"2606.28538","version":1},"attestation_state":"computed","paper":{"title":"Legal Domain Adaptation of Modern BERT Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dominik Stammbach, Peter Henderson","submitted_at":"2026-06-26T18:44:11Z","abstract_excerpt":"We investigate domain adaptation of modern BERT models in the legal domain. We further pre-train ModernBERT on all US court opinions using the masked language modeling objective. Although ModernBERT has been trained on roughly 500x more data than original BERT, we still find that this model benefits from further pre-training and domain adaptation in the legal domain: we report significant improvements compared to vanilla ModernBERT on all datasets connected to US court opinions. We find gains similar to those reported in early work on domain adaptation of BERT-like models. However, from scratc"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.28538","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-26T18:44:11Z","cross_cats_sorted":[],"title_canon_sha256":"1e41a29a6b18fbc6c828266c808110c44bf9e4c388e4a1193a7c7886f47f554c","abstract_canon_sha256":"67b03867af64544bd509afc2129d7da68472dc964f3b403a8cf01c846731ed82"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T00:15:17.445442Z","signature_b64":"p3Au+Ui4fJO+ieifqERiuzZF/cmnw4CyhXJ5rhGxfgIStz8aW5rQ22KiMjcTskam0DSRm2DAo0Av4F07HAUJAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c721ce26fb740f959ba7613920fd8f440b40ef3b9066680b13fa2e947e36df2d","last_reissued_at":"2026-06-30T00:15:17.445055Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T00:15:17.445055Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Legal Domain Adaptation of Modern BERT Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dominik Stammbach, Peter Henderson","submitted_at":"2026-06-26T18:44:11Z","abstract_excerpt":"We investigate domain adaptation of modern BERT models in the legal domain. We further pre-train ModernBERT on all US court opinions using the masked language modeling objective. Although ModernBERT has been trained on roughly 500x more data than original BERT, we still find that this model benefits from further pre-training and domain adaptation in the legal domain: we report significant improvements compared to vanilla ModernBERT on all datasets connected to US court opinions. We find gains similar to those reported in early work on domain adaptation of BERT-like models. However, from scratc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28538","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.28538/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.28538","created_at":"2026-06-30T00:15:17.445109+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.28538v1","created_at":"2026-06-30T00:15:17.445109+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28538","created_at":"2026-06-30T00:15:17.445109+00:00"},{"alias_kind":"pith_short_12","alias_value":"Y4Q44JX3OQHZ","created_at":"2026-06-30T00:15:17.445109+00:00"},{"alias_kind":"pith_short_16","alias_value":"Y4Q44JX3OQHZLG5H","created_at":"2026-06-30T00:15:17.445109+00:00"},{"alias_kind":"pith_short_8","alias_value":"Y4Q44JX3","created_at":"2026-06-30T00:15:17.445109+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ","json":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ.json","graph_json":"https://pith.science/api/pith-number/Y4Q44JX3OQHZLG5HME4SB7MPIQ/graph.json","events_json":"https://pith.science/api/pith-number/Y4Q44JX3OQHZLG5HME4SB7MPIQ/events.json","paper":"https://pith.science/paper/Y4Q44JX3"},"agent_actions":{"view_html":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ","download_json":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ.json","view_paper":"https://pith.science/paper/Y4Q44JX3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.28538&json=true","fetch_graph":"https://pith.science/api/pith-number/Y4Q44JX3OQHZLG5HME4SB7MPIQ/graph.json","fetch_events":"https://pith.science/api/pith-number/Y4Q44JX3OQHZLG5HME4SB7MPIQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ/action/storage_attestation","attest_author":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ/action/author_attestation","sign_citation":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ/action/citation_signature","submit_replication":"https://pith.science/pith/Y4Q44JX3OQHZLG5HME4SB7MPIQ/action/replication_record"}},"created_at":"2026-06-30T00:15:17.445109+00:00","updated_at":"2026-06-30T00:15:17.445109+00:00"}