{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EH2WS2IUAXS2VJH66SCZTAG3VK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3c5422cc7bc648a6c1fb6891203168ab6b97661f79d4db3b32c822b17e9a4a97","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-01T11:32:02Z","title_canon_sha256":"f0588d4fea4f60f22ad3a5d811c3e558c59b76655a693dfb54e3901a6f7785d6"},"schema_version":"1.0","source":{"id":"2606.02100","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.02100","created_at":"2026-06-02T02:05:06Z"},{"alias_kind":"arxiv_version","alias_value":"2606.02100v1","created_at":"2026-06-02T02:05:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.02100","created_at":"2026-06-02T02:05:06Z"},{"alias_kind":"pith_short_12","alias_value":"EH2WS2IUAXS2","created_at":"2026-06-02T02:05:06Z"},{"alias_kind":"pith_short_16","alias_value":"EH2WS2IUAXS2VJH6","created_at":"2026-06-02T02:05:06Z"},{"alias_kind":"pith_short_8","alias_value":"EH2WS2IU","created_at":"2026-06-02T02:05:06Z"}],"graph_snapshots":[{"event_id":"sha256:5bb60ea11592e5aafb4877320bfda3475f4dfcbb81906b428a368ddef7ffad2c","target":"graph","created_at":"2026-06-02T02:05:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.02100/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Transformer models dominate modern NLP, but efficient, language-specific models remain scarce. In Portuguese, most focus on scale or accuracy, often neglecting training and deployment efficiency. In the present work, we introduce PortBERT, a family of RoBERTa-based language models for Portuguese, designed to balance performance and efficiency. Trained from scratch on over 450 GB of deduplicated and filtered mC4 and OSCAR23 from CulturaX using fairseq, PortBERT leverages byte-level BPE tokenization and stable pre-training routines across both GPU and TPU processors. We release two variants, Por","authors_text":"Armando B. Mendes, Henry He, Raphael Scheible-Schmitt","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-01T11:32:02Z","title":"PortBERT: Navigating the Depths of Portuguese Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.02100","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bdc24d8c2f75a13a9514a69658538abb25ca639030d3636c7073029c5c3d37cc","target":"record","created_at":"2026-06-02T02:05:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3c5422cc7bc648a6c1fb6891203168ab6b97661f79d4db3b32c822b17e9a4a97","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-01T11:32:02Z","title_canon_sha256":"f0588d4fea4f60f22ad3a5d811c3e558c59b76655a693dfb54e3901a6f7785d6"},"schema_version":"1.0","source":{"id":"2606.02100","kind":"arxiv","version":1}},"canonical_sha256":"21f569691405e5aaa4fef4859980dbaaa32cc73292a857616e509da4a3856fc2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"21f569691405e5aaa4fef4859980dbaaa32cc73292a857616e509da4a3856fc2","first_computed_at":"2026-06-02T02:05:06.503988Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T02:05:06.503988Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mm3+OCKrpuAABbObM+oZ73y5R+mgQQW8aYUCTJKALX8mP6Kp589hDYT9VVQldKaKyRE06ugRwsR6TX1m2S0SAQ==","signature_status":"signed_v1","signed_at":"2026-06-02T02:05:06.504353Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.02100","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bdc24d8c2f75a13a9514a69658538abb25ca639030d3636c7073029c5c3d37cc","sha256:5bb60ea11592e5aafb4877320bfda3475f4dfcbb81906b428a368ddef7ffad2c"],"state_sha256":"1cf495b1bddc9f00beb177ff2a6de9d8cbf2c56f8e2abcca8a045515185eba98"}