{"schema":"https://pith.science/schemas/pith-integrity/v1.json","pith_number":"2604.27263","arxiv_id":"2604.27263","integrity":{"available":true,"endpoint":"/pith/2604.27263/integrity.json","summary":{"critical":0,"advisory":10,"informational":0,"by_detector":{"doi_compliance":{"total":10,"critical":0,"advisory":10,"informational":0}}},"clean":false,"detectors_run":[{"name":"ai_meta_artifact","version":"1.0.0","status":"completed","ran_at":"2026-05-20T22:41:04.857710Z","findings_count":0},{"name":"doi_compliance","version":"1.0.0","status":"completed","ran_at":"2026-05-19T19:25:52.347849Z","findings_count":10}],"findings":[{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.1162/tacl_a_00448.url:https://doi.org/10.1162/tacl_a_00448(visitedon) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.1162/tacl_a_00448.url:https://doi.org/10.1162/tacl_a_00448(visitedon","detected_arxiv_id":null,"ref_index":6,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/P18-1007.URL:https://aclanthology.org/P18-1007/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/P18-1007.URL:https://aclanthology.org/P18-1007/(visited","detected_arxiv_id":null,"ref_index":18,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2021.acl-long.243.URL:https://aclanthology.org/2021.acl-long.243/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/2021.acl-long.243.URL:https://aclanthology.org/2021.acl-long.243/(visited","detected_arxiv_id":null,"ref_index":29,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2020.findings-emnlp.414.URL:https://aclanthology.org/2020.findings-emnlp.414/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/2020.findings-emnlp.414.URL:https://aclanthology.org/2020.findings-emnlp.414/(visited","detected_arxiv_id":null,"ref_index":2,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.1016/j.eswa.2026.131492.url:https) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.1016/j.eswa.2026.131492.url:https","detected_arxiv_id":null,"ref_index":9,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.48550/arXiv.2512.15586.arXiv:2512.15586[cs) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.48550/arXiv.2512.15586.arXiv:2512.15586[cs","detected_arxiv_id":null,"ref_index":23,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/P16-1162.URL:https://aclanthology.org/P16-1162/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/P16-1162.URL:https://aclanthology.org/P16-1162/(visited","detected_arxiv_id":null,"ref_index":31,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-acl.593.URL:https) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/2025.findings-acl.593.URL:https","detected_arxiv_id":null,"ref_index":5,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/D19-1141.URL:https://aclanthology.org/D19-1141/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/D19-1141.URL:https://aclanthology.org/D19-1141/","detected_arxiv_id":null,"ref_index":11,"audited_at":"2026-05-19T19:25:52.347849Z"},{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2020.acl-main.170.URL:https://aclanthology.org/2020.acl-main.170/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/2020.acl-main.170.URL:https://aclanthology.org/2020.acl-main.170/(visited","detected_arxiv_id":null,"ref_index":26,"audited_at":"2026-05-19T19:25:52.347849Z"}],"snapshot_sha256":"3b3395fdb9006dcb83d4338f5bd4f565a182db68a20aa9beeb1ac55cfe190ae1"},"events":[{"event_id":2722,"event_type":"pith.integrity.v1","payload_sha256":"ebbc47e1b1e9a12d9569cedcff54830cc22faddb113baa021f162e06b2c4303a","signature_b64":"HqgOuRcaj/OrpCfP8GFS4V8nfEcW/Hb53HKJDqyCLnAawZtg+X8SLNhHpgJQpeDU+lg2z/ETWw96+sG0RstvBg==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.456300+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/P16-1162.URL:https://aclanthology.org/P16-1162/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Rico Sennrich, Barry Haddow, and Alexandra Birch. “Neural Machine Translation of Rare Words with Subword Units”. In:Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). ACL 2016. E","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":31,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Rico Sennrich, Barry Haddow, and Alexandra Birch. “Neural Machine Translation of Rare Words with Subword Units”. In:Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). ACL 2016. E","reconstructed_doi":"10.18653/v1/P16-1162.URL:https://aclanthology.org/P16-1162/(visited"},"severity":"advisory","ref_index":31,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/P16-1162.URL:https://aclanthology.org/P16-1162/(visited","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"8ffe8f9a625585be4dfc252028b6afecf376f58f9f18cdfddd8284314cbcf2ac","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2721,"event_type":"pith.integrity.v1","payload_sha256":"eff1f89b6878b8e6b6824c23f5bc803853b3afb4ce4933b123da62c8a832a168","signature_b64":"YA7MKEBiwJ//Ia0WKpXFj7YPtNDwWBhphadqvpE8xVem2t0SHOW0XnOjxvqk3vPdYFYFThCRVv+hLuVQ5qJ3DQ==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.455064+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2021.acl-long.243.URL:https://aclanthology.org/2021.acl-long.243/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Phillip Rust, Jonas Pfeiffer, Ivan Vuli ´c, Sebastian Ruder, and Iryna Gurevych. “How Good is Your Tokenizer? On the Monolingual Performance of Multilingual Language Models”. In: Proceedings of the 59th Annual Meeting of the Association for","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":29,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Phillip Rust, Jonas Pfeiffer, Ivan Vuli ´c, Sebastian Ruder, and Iryna Gurevych. “How Good is Your Tokenizer? On the Monolingual Performance of Multilingual Language Models”. In: Proceedings of the 59th Annual Meeting of the Association for","reconstructed_doi":"10.18653/v1/2021.acl-long.243.URL:https://aclanthology.org/2021.acl-long.243/(visited"},"severity":"advisory","ref_index":29,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2021.acl-long.243.URL:https://aclanthology.org/2021.acl-long.243/(visited","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"536619505ecf6697c0d7c68162c060ee13c6ad8f6ef5527cb5895c0bea167fa4","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2720,"event_type":"pith.integrity.v1","payload_sha256":"2e8651218fb09f49bd212c3fe96b90bb451833cadf899cd6a3c67576110c5797","signature_b64":"MWpO5K/eNWlVD/PO+b0ECph7GVXBnNZw1w8M1gug0cB42UOLFgS+x8w5uGUlU5zmJ3KOvTqyio5kuNL4Kyh8Cg==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.453980+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2020.acl-main.170.URL:https://aclanthology.org/2020.acl-main.170/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Ivan Provilkov, Dmitrii Emelianenko, and Elena V oita. “BPE-Dropout: Simple and Effective Subword Regularization”. In:Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. ACL 2020. Ed. by Dan Jurafsky, Jo","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":26,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Ivan Provilkov, Dmitrii Emelianenko, and Elena V oita. “BPE-Dropout: Simple and Effective Subword Regularization”. In:Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. ACL 2020. Ed. by Dan Jurafsky, Jo","reconstructed_doi":"10.18653/v1/2020.acl-main.170.URL:https://aclanthology.org/2020.acl-main.170/(visited"},"severity":"advisory","ref_index":26,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2020.acl-main.170.URL:https://aclanthology.org/2020.acl-main.170/(visited","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"cd28466803c456a9f6939cad120dbc7bf8eb3ecf7a1466150783772cf5caec1e","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2719,"event_type":"pith.integrity.v1","payload_sha256":"f7a3eb7503b75da4a1832541f42284c65eb4ea5eeae54046a838970c9e96c55b","signature_b64":"MMlRIj/8txp/KYqvkK3j93BL/yms7cNvJO1gAlfGLkp8Oc8VBOET+ajAdUqBBLZhszYDw6/n9bFi6FcERWPWDA==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.452995+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.48550/arXiv.2512.15586.arXiv:2512.15586[cs) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Benjamin Minixhofer, Tyler Murray, Tomasz Limisiewicz, Anna Korhonen, Luke Zettle- moyer, Noah A. Smith, Edoardo M. Ponti, Luca Soldaini, and Valentin Hofmann.Bolmo: Byteifying the Next Generation of Language Models. Dec. 17, 2025.DOI:10.48","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":23,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Benjamin Minixhofer, Tyler Murray, Tomasz Limisiewicz, Anna Korhonen, Luke Zettle- moyer, Noah A. Smith, Edoardo M. Ponti, Luca Soldaini, and Valentin Hofmann.Bolmo: Byteifying the Next Generation of Language Models. Dec. 17, 2025.DOI:10.48","reconstructed_doi":"10.48550/arXiv.2512.15586.arXiv:2512.15586[cs"},"severity":"advisory","ref_index":23,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.48550/arXiv.2512.15586.arXiv:2512.15586[cs","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"a582473f1d2ed65fb094ffd97f0bd1ba2fb7b53961495009332bf212ababdea9","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2718,"event_type":"pith.integrity.v1","payload_sha256":"db5d1d517834c936594f932868051f9554a93005e68cb62ee17fcaee12354691","signature_b64":"2RnbOZEKnsbTl6V1akaLrkkv4VcPf/PSyCN6UyjTJ6hs9QqKT5BnJk/l1Tt7wYUVuGW81FD0ex1nNNfZP4D2Ag==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.451967+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/P18-1007.URL:https://aclanthology.org/P18-1007/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Taku Kudo. “Subword Regularization: Improving Neural Network Translation Models with Multiple Subword Candidates”. In:Proceedings of the 56th Annual Meeting of the Asso- ciation for Computational Linguistics (Volume 1: Long Papers). ACL 201","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":18,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Taku Kudo. “Subword Regularization: Improving Neural Network Translation Models with Multiple Subword Candidates”. In:Proceedings of the 56th Annual Meeting of the Asso- ciation for Computational Linguistics (Volume 1: Long Papers). ACL 201","reconstructed_doi":"10.18653/v1/P18-1007.URL:https://aclanthology.org/P18-1007/(visited"},"severity":"advisory","ref_index":18,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/P18-1007.URL:https://aclanthology.org/P18-1007/(visited","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"97492c6144fb16c4e66805df53230772ea1f36dc0d74ad19a53fb41214c6206e","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2717,"event_type":"pith.integrity.v1","payload_sha256":"8d29b24dec536d56e2c4f41dd78aab90ace6c4ad4db5b9518ae7900913543af3","signature_b64":"yWthXZ8G5ZJ/YsmYGGrmmBK9NzOgdT9TiXfnPgdaWUlwuqFJW4TZxQ5m8t03CXP3Azs/d54aqDocFcqI5W7zCg==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.451001+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/D19-1141.URL:https://aclanthology.org/D19-1141/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Matthias Gall ´e. “Investigating the Effectiveness of BPE: The Power of Shorter Sequences”. In:Proceedings of the 2019 Conference on Empirical Methods in Natural Language Process- ing and the 9th International Joint Conference on Natural La","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":11,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Matthias Gall ´e. “Investigating the Effectiveness of BPE: The Power of Shorter Sequences”. In:Proceedings of the 2019 Conference on Empirical Methods in Natural Language Process- ing and the 9th International Joint Conference on Natural La","reconstructed_doi":"10.18653/v1/D19-1141.URL:https://aclanthology.org/D19-1141/"},"severity":"advisory","ref_index":11,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/D19-1141.URL:https://aclanthology.org/D19-1141/","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"84e30595605e522eafc58fca4eeb1a78306a33c3da11cb69e8d0f31cad329a2f","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2716,"event_type":"pith.integrity.v1","payload_sha256":"3ebef424fe1b7302546a55a9d8a4026ecec2a7f3138e4ebf3b00d5c12da0c269","signature_b64":"Y0EDL6PWcCWSWaWjZ23PH0lEoaAGdBmzGtOSZ4jLntffKD7Fjan7vF49xYdHmj0O2FFVvgKQi4M41Z0WopjbCQ==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.449788+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.1016/j.eswa.2026.131492.url:https) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"D ´avid Dr ˇz´ık and Jozef Kapusta. “The importance of morphology-aware subword tokeniza- tion for NLP tasks in Slovak language modeling”. In:Expert Systems with Applications312 (May 25, 2026), p. 131492.ISSN: 0957-4174.DOI:10.1016/j.eswa.2","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":9,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"D ´avid Dr ˇz´ık and Jozef Kapusta. “The importance of morphology-aware subword tokeniza- tion for NLP tasks in Slovak language modeling”. In:Expert Systems with Applications312 (May 25, 2026), p. 131492.ISSN: 0957-4174.DOI:10.1016/j.eswa.2","reconstructed_doi":"10.1016/j.eswa.2026.131492.url:https"},"severity":"advisory","ref_index":9,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.1016/j.eswa.2026.131492.url:https","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"b9ea0389c00e6cc924dde0fe7cddabd335467bd569a7277db974b37ed946fe04","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2715,"event_type":"pith.integrity.v1","payload_sha256":"c17edb264b2a4c34c82ffe0885e493113bc96a758ba01ffe08dfdf73d9d9ca46","signature_b64":"/9rSSblV1X0iZsYcF8o6KDTQP02Z39DnP6RkIrWaE/6/ktMD2lPUtUP4/yDBF6+W6yF3z174FYQcJAKN4Q+nBg==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.448452+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.1162/tacl_a_00448.url:https://doi.org/10.1162/tacl_a_00448(visitedon) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Jonathan H. Clark, Dan Garrette, Iulia Turc, and John Wieting. “Canine: Pre-training an Effi- cient Tokenization-Free Encoder for Language Representation”. In:Transactions of the Asso- ciation for Computational Linguistics10 (Jan. 31, 2022)","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":6,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Jonathan H. Clark, Dan Garrette, Iulia Turc, and John Wieting. “Canine: Pre-training an Effi- cient Tokenization-Free Encoder for Language Representation”. In:Transactions of the Asso- ciation for Computational Linguistics10 (Jan. 31, 2022)","reconstructed_doi":"10.1162/tacl_a_00448.url:https://doi.org/10.1162/tacl_a_00448(visitedon"},"severity":"advisory","ref_index":6,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.1162/tacl_a_00448.url:https://doi.org/10.1162/tacl_a_00448(visitedon","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"9225708755a8bd10105ddb102e041ae2b0a4728217bfddaf591f0df7c077ff60","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2714,"event_type":"pith.integrity.v1","payload_sha256":"fdbaa854d56bcce2c22a93d116c993112230d943038c02f7b57461af484bf1e7","signature_b64":"P4eK/0Pmzxe3hTuh3P3sCx3wkwxQzn6gJv94C0aWz40BFLgC0o/LyIMxutQextmto0An2/xB0Q6hn1tsGM0uCw==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.447092+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-acl.593.URL:https) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Cristiano Ciaccio, Marta Sartor, Alessio Miaschi, and Felice Dell’Orletta. “Beyond the Spelling Miracle: Investigating Substring Awareness in Character-Blind Language Models”. In:Findings of the Association for Computational Linguistics: AC","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":5,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Cristiano Ciaccio, Marta Sartor, Alessio Miaschi, and Felice Dell’Orletta. “Beyond the Spelling Miracle: Investigating Substring Awareness in Character-Blind Language Models”. In:Findings of the Association for Computational Linguistics: AC","reconstructed_doi":"10.18653/v1/2025.findings-acl.593.URL:https"},"severity":"advisory","ref_index":5,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2025.findings-acl.593.URL:https","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"517f83a1364ab59c57d25a0b1760bc6815711e226f1dba55161966cad063dc2b","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}},{"event_id":2713,"event_type":"pith.integrity.v1","payload_sha256":"aa9dc8a39568123eddf3c42c13c16a9f09c8ee2eb22e2dc0f90039cadf624c2b","signature_b64":"e5mAg13Wy2WWT3dbZUenKchfMCrrAr4Ttx2Fx4m/CKYOPkWLYfMx9NXmLfS3K1iaLATUfiZ2BwK5JL5f4LNjDw==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-19T19:27:19.445843+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2020.findings-emnlp.414.URL:https://aclanthology.org/2020.findings-emnlp.414/(visited) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Kaj Bostrom and Greg Durrett. “Byte Pair Encoding is Suboptimal for Language Model Pretraining”. In:Findings of the Association for Computational Linguistics: EMNLP 2020. Findings 2020. Ed. by Trevor Cohn, Yulan He, and Yang Liu. Online: As","arxiv_id":"2604.27263","detector":"doi_compliance","evidence":{"ref_index":2,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Kaj Bostrom and Greg Durrett. “Byte Pair Encoding is Suboptimal for Language Model Pretraining”. In:Findings of the Association for Computational Linguistics: EMNLP 2020. Findings 2020. Ed. by Trevor Cohn, Yulan He, and Yang Liu. Online: As","reconstructed_doi":"10.18653/v1/2020.findings-emnlp.414.URL:https://aclanthology.org/2020.findings-emnlp.414/(visited"},"severity":"advisory","ref_index":2,"audited_at":"2026-05-19T19:25:52.347849Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2020.findings-emnlp.414.URL:https://aclanthology.org/2020.findings-emnlp.414/(visited","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"a57c738f22de52bfc8e90c9289a6a0c175e1e552a55e6efd1c1885e019030d3e","paper_version":2,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}}],"endpoint_self":"/pith/2604.27263/integrity.json","protocol_url":"https://pith.science/pith-integrity-protocol"}