{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2021:FDFGYQPNZOWKGEGOYWRFW2MGHC","short_pith_number":"pith:FDFGYQPN","schema_version":"1.0","canonical_sha256":"28ca6c41edcbaca310cec5a25b698638b19ebd4b477ee3e1287f10b98ea1e9f0","source":{"kind":"arxiv","id":"2106.16163","version":2},"attestation_state":"computed","paper":{"title":"The MultiBERTs: BERT Reproductions for Robustness Analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Alexander D'Amour, Dipanjan Das, Ellie Pavlick, Ian Tenney, Iulia Turc, Jacob Eisenstein, Jasmijn Bastings, Jason Wei, Naomi Saphra, Steve Yadlowsky, Tal Linzen, Thibault Sellam","submitted_at":"2021-06-30T15:56:44Z","abstract_excerpt":"Experiments with pre-trained models such as BERT are often based on a single checkpoint. While the conclusions drawn apply to the artifact tested in the experiment (i.e., the particular instance of the model), it is not always clear whether they hold for the more general procedure which includes the architecture, training data, initialization scheme, and loss function. Recent work has shown that repeating the pre-training process can lead to substantially different performance, suggesting that an alternate strategy is needed to make principled statements about procedures. To enable researchers"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2106.16163","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-06-30T15:56:44Z","cross_cats_sorted":[],"title_canon_sha256":"27ef2dcbd8f0bb7a6aead2b1062c0fe32f5fb70fb9e21edb3920931e13dc31dc","abstract_canon_sha256":"27369dfd24121815881021f2dfb368b1d6fbd5edfa26fb3985fa287549edd4f0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T04:07:04.447671Z","signature_b64":"eh1KCOT9GGbz8bkI0Kt6ZAyqjndnhyAGNITDWVnj3l+RxwYUM69HBlSDCdLfAeOBmfYdbNwv529t+oE84q/sCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"28ca6c41edcbaca310cec5a25b698638b19ebd4b477ee3e1287f10b98ea1e9f0","last_reissued_at":"2026-07-05T04:07:04.447183Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T04:07:04.447183Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The MultiBERTs: BERT Reproductions for Robustness Analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Alexander D'Amour, Dipanjan Das, Ellie Pavlick, Ian Tenney, Iulia Turc, Jacob Eisenstein, Jasmijn Bastings, Jason Wei, Naomi Saphra, Steve Yadlowsky, Tal Linzen, Thibault Sellam","submitted_at":"2021-06-30T15:56:44Z","abstract_excerpt":"Experiments with pre-trained models such as BERT are often based on a single checkpoint. While the conclusions drawn apply to the artifact tested in the experiment (i.e., the particular instance of the model), it is not always clear whether they hold for the more general procedure which includes the architecture, training data, initialization scheme, and loss function. Recent work has shown that repeating the pre-training process can lead to substantially different performance, suggesting that an alternate strategy is needed to make principled statements about procedures. To enable researchers"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2106.16163","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2106.16163/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2106.16163","created_at":"2026-07-05T04:07:04.447240+00:00"},{"alias_kind":"arxiv_version","alias_value":"2106.16163v2","created_at":"2026-07-05T04:07:04.447240+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2106.16163","created_at":"2026-07-05T04:07:04.447240+00:00"},{"alias_kind":"pith_short_12","alias_value":"FDFGYQPNZOWK","created_at":"2026-07-05T04:07:04.447240+00:00"},{"alias_kind":"pith_short_16","alias_value":"FDFGYQPNZOWKGEGO","created_at":"2026-07-05T04:07:04.447240+00:00"},{"alias_kind":"pith_short_8","alias_value":"FDFGYQPN","created_at":"2026-07-05T04:07:04.447240+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2606.15980","citing_title":"Do Activation Monitors Survive Model Updates? Benchmarking, Predicting, and Repairing Activation-Monitor Staleness","ref_index":30,"is_internal_anchor":false},{"citing_arxiv_id":"2304.01373","citing_title":"Pythia: A Suite for Analyzing Large Language Models Across Training and Scaling","ref_index":151,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC","json":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC.json","graph_json":"https://pith.science/api/pith-number/FDFGYQPNZOWKGEGOYWRFW2MGHC/graph.json","events_json":"https://pith.science/api/pith-number/FDFGYQPNZOWKGEGOYWRFW2MGHC/events.json","paper":"https://pith.science/paper/FDFGYQPN"},"agent_actions":{"view_html":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC","download_json":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC.json","view_paper":"https://pith.science/paper/FDFGYQPN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2106.16163&json=true","fetch_graph":"https://pith.science/api/pith-number/FDFGYQPNZOWKGEGOYWRFW2MGHC/graph.json","fetch_events":"https://pith.science/api/pith-number/FDFGYQPNZOWKGEGOYWRFW2MGHC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC/action/storage_attestation","attest_author":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC/action/author_attestation","sign_citation":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC/action/citation_signature","submit_replication":"https://pith.science/pith/FDFGYQPNZOWKGEGOYWRFW2MGHC/action/replication_record"}},"created_at":"2026-07-05T04:07:04.447240+00:00","updated_at":"2026-07-05T04:07:04.447240+00:00"}