{"schema":"https://pith.science/schemas/pith-integrity/v1.json","pith_number":"2604.18292","arxiv_id":"2604.18292","integrity":{"available":true,"endpoint":"/pith/2604.18292/integrity.json","summary":{"critical":0,"advisory":1,"informational":0,"by_detector":{"doi_compliance":{"total":1,"critical":0,"advisory":1,"informational":0}}},"clean":false,"detectors_run":[{"name":"doi_compliance","version":"1.0.0","status":"completed","ran_at":"2026-05-20T04:13:25.779293Z","findings_count":1}],"findings":[{"detector":"doi_compliance","finding_type":"recoverable_identifier","severity":"advisory","verdict_class":"incontrovertible","note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-naacl.65.URLhttps://aclanthology.org/2025.findings-naacl.65/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","detected_doi":"10.18653/v1/2025.findings-naacl.65.URLhttps://aclanthology.org/2025.findings-naacl.65/","detected_arxiv_id":null,"ref_index":61,"audited_at":"2026-05-20T04:13:25.779293Z"}],"snapshot_sha256":"4abfb7e6f243a0923654f2a757557c8d810c155b9b98627e175d764988559411"},"events":[{"event_id":4737,"event_type":"pith.integrity.v1","payload_sha256":"94989e7d68bf40397d3e3645e59d209baa57007f22e7335efb5fc03c771e1928","signature_b64":"Fj6foEuYarwPgpRSkuBZCmOSfv8kxCusflLokgg/imoNX0TVED3hOA8sSI3Nd+ufwAHjvehimGBtefZWc/HcBw==","signing_key_id":"pith-v1-2026-05","created_at":"2026-05-20T04:17:42.007156+00:00","payload":{"note":"DOI in the printed bibliography is fragmented by whitespace or line breaks. A longer candidate (10.18653/v1/2025.findings-naacl.65.URLhttps://aclanthology.org/2025.findings-naacl.65/) was visible in the surrounding text but could not be confirmed against doi.org as printed.","snippet":"Jiarui Lu, Thomas Holleis, Yizhe Zhang, Bernhard Aumayer, Feng Nan, Haoping Bai, Shuang Ma, Shen Ma, Mengyu Li, Guoli Yin, Zirui Wang, and Ruoming Pang. ToolSandbox: A stateful, conversational, interactive evaluation benchmark for LLM tool ","arxiv_id":"2604.18292","detector":"doi_compliance","evidence":{"ref_index":61,"verdict_class":"incontrovertible","resolved_title":null,"printed_excerpt":"Jiarui Lu, Thomas Holleis, Yizhe Zhang, Bernhard Aumayer, Feng Nan, Haoping Bai, Shuang Ma, Shen Ma, Mengyu Li, Guoli Yin, Zirui Wang, and Ruoming Pang. ToolSandbox: A stateful, conversational, interactive evaluation benchmark for LLM tool ","reconstructed_doi":"10.18653/v1/2025.findings-naacl.65.URLhttps://aclanthology.org/2025.findings-naacl.65/"},"severity":"advisory","ref_index":61,"audited_at":"2026-05-20T04:13:25.779293Z","event_type":"pith.integrity.v1","detected_doi":"10.18653/v1/2025.findings-naacl.65.URLhttps://aclanthology.org/2025.findings-naacl.65/","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"recoverable_identifier","evidence_hash":"7633d5d51affe5944908d1b9da05c28f2d65e300d6d1e09a2f563959ab2db201","paper_version":1,"verdict_class":"incontrovertible","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null}}],"endpoint_self":"/pith/2604.18292/integrity.json","protocol_url":"https://pith.science/pith-integrity-protocol"}