{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BTSJHMU23PZI7JZPOEO55I2HHR","short_pith_number":"pith:BTSJHMU2","schema_version":"1.0","canonical_sha256":"0ce493b29adbf28fa72f711ddea3473c5834132ce75f9fda7290d7b6c3150b1c","source":{"kind":"arxiv","id":"2605.27062","version":1},"attestation_state":"computed","paper":{"title":"FalAR: A Large-scale Speaker-Annotated European Portuguese Speech Corpus of Parliamentary Sessions","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Alberto Abad, Ben Peters, Carlos Carvalho, Catarina Botelho, Francisco Teixeira, Isabel Trancoso, Mariana Juli\\~ao, Rub\\'en Solera-Ure\\~na, S\\'ergio Paulo, Thomas Rolland","submitted_at":"2026-05-26T14:14:37Z","abstract_excerpt":"State-of-the-art performance for Automatic Speech Recognition (ASR) largely depends on the availability of large-scale labeled corpora. This creates a demand for increased data collection efforts, particularly for under-represented languages and dialectal varieties. Due to having considerably fewer speakers (around 11 million), European Portuguese (EP) is overshadowed by Brazilian Portuguese (BP) (around 200 million speakers) in currently available large-scale speech data resources, resulting in under-performing speech-based systems for EP users. To address this gap, and following similar data"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.27062","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-26T14:14:37Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"a67bac37af77d06650b56cb6e55cf1aeda8cd9b7db65d79f2209eba4a2f4a790","abstract_canon_sha256":"eef6cf4cbb6c501089f60ab53c3b4d7c7fe108638cc69cdb8dec9effe2354ad2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:06:26.487107Z","signature_b64":"lSEb9we+xokYzUY9cD5IxNgUZMYkvfFKFSdty2Z9L9lD2ma3xPZdJu5VeUO1NfUbUhjpmvavhyiyku1LUgYzCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0ce493b29adbf28fa72f711ddea3473c5834132ce75f9fda7290d7b6c3150b1c","last_reissued_at":"2026-05-27T01:06:26.486677Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:06:26.486677Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"FalAR: A Large-scale Speaker-Annotated European Portuguese Speech Corpus of Parliamentary Sessions","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Alberto Abad, Ben Peters, Carlos Carvalho, Catarina Botelho, Francisco Teixeira, Isabel Trancoso, Mariana Juli\\~ao, Rub\\'en Solera-Ure\\~na, S\\'ergio Paulo, Thomas Rolland","submitted_at":"2026-05-26T14:14:37Z","abstract_excerpt":"State-of-the-art performance for Automatic Speech Recognition (ASR) largely depends on the availability of large-scale labeled corpora. This creates a demand for increased data collection efforts, particularly for under-represented languages and dialectal varieties. Due to having considerably fewer speakers (around 11 million), European Portuguese (EP) is overshadowed by Brazilian Portuguese (BP) (around 200 million speakers) in currently available large-scale speech data resources, resulting in under-performing speech-based systems for EP users. To address this gap, and following similar data"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27062","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27062/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.27062","created_at":"2026-05-27T01:06:26.486738+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.27062v1","created_at":"2026-05-27T01:06:26.486738+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27062","created_at":"2026-05-27T01:06:26.486738+00:00"},{"alias_kind":"pith_short_12","alias_value":"BTSJHMU23PZI","created_at":"2026-05-27T01:06:26.486738+00:00"},{"alias_kind":"pith_short_16","alias_value":"BTSJHMU23PZI7JZP","created_at":"2026-05-27T01:06:26.486738+00:00"},{"alias_kind":"pith_short_8","alias_value":"BTSJHMU2","created_at":"2026-05-27T01:06:26.486738+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR","json":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR.json","graph_json":"https://pith.science/api/pith-number/BTSJHMU23PZI7JZPOEO55I2HHR/graph.json","events_json":"https://pith.science/api/pith-number/BTSJHMU23PZI7JZPOEO55I2HHR/events.json","paper":"https://pith.science/paper/BTSJHMU2"},"agent_actions":{"view_html":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR","download_json":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR.json","view_paper":"https://pith.science/paper/BTSJHMU2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.27062&json=true","fetch_graph":"https://pith.science/api/pith-number/BTSJHMU23PZI7JZPOEO55I2HHR/graph.json","fetch_events":"https://pith.science/api/pith-number/BTSJHMU23PZI7JZPOEO55I2HHR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR/action/storage_attestation","attest_author":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR/action/author_attestation","sign_citation":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR/action/citation_signature","submit_replication":"https://pith.science/pith/BTSJHMU23PZI7JZPOEO55I2HHR/action/replication_record"}},"created_at":"2026-05-27T01:06:26.486738+00:00","updated_at":"2026-05-27T01:06:26.486738+00:00"}