{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:FACHC5474IHHX5SYZ5ORB6OSX2","short_pith_number":"pith:FACHC547","schema_version":"1.0","canonical_sha256":"280471779fe20e7bf658cf5d10f9d2bebc2a83fb0678222912550b94faf1ac27","source":{"kind":"arxiv","id":"2605.26730","version":1},"attestation_state":"computed","paper":{"title":"PRISM: A Multi-Dimensional Benchmark for Evaluating LLM Peer Reviewers","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Binh T. Nguyen, Duy A Nguyen, Khoa D. Doan, Kok-Seng Wong, Ngoc Phan Phuoc Loc, Nitesh V. Chawla, Thanh Nguyen, Thanh Tran Khanh, Toan Huynh La Viet, Tuan Anh Nguyen Pham, Wray Buntine","submitted_at":"2026-05-26T09:06:27Z","abstract_excerpt":"The rapid growth in submissions to machine learning venues has strained the scientific peer-review system and intensified interest in LLM-based automated peer reviewers. However, how good these systems are actually, especially compared to human reviewers at catching scientific gaps, remains poorly understood. In this work, we introduce PRISM (Peer Review Intelligence via Structured Multi-dimensional assessment), a benchmarking framework that evaluates review quality across four dimensions: Depth of Analysis, Novelty Assessment,Flaw Identification & Major Issues Prioritization, and Multi-dimens"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.26730","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-26T09:06:27Z","cross_cats_sorted":[],"title_canon_sha256":"12128d3401cd1a3910c9579f22153eccc96030cd3ff2af49794a573c51973d4e","abstract_canon_sha256":"20beb66dc855ff0965895b481dc7eeb646f20a76241153d0c3d0c98cf4255325"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:06:09.730896Z","signature_b64":"HcuQ4lz2dMalUHWJlz2/xOl3Bj1nF8widJQFz4sHbBcfuEvW50u0FXz1FyZtVAeTPBmOeBNC5MIyRwhsUYHSDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"280471779fe20e7bf658cf5d10f9d2bebc2a83fb0678222912550b94faf1ac27","last_reissued_at":"2026-05-27T01:06:09.730283Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:06:09.730283Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PRISM: A Multi-Dimensional Benchmark for Evaluating LLM Peer Reviewers","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Binh T. Nguyen, Duy A Nguyen, Khoa D. Doan, Kok-Seng Wong, Ngoc Phan Phuoc Loc, Nitesh V. Chawla, Thanh Nguyen, Thanh Tran Khanh, Toan Huynh La Viet, Tuan Anh Nguyen Pham, Wray Buntine","submitted_at":"2026-05-26T09:06:27Z","abstract_excerpt":"The rapid growth in submissions to machine learning venues has strained the scientific peer-review system and intensified interest in LLM-based automated peer reviewers. However, how good these systems are actually, especially compared to human reviewers at catching scientific gaps, remains poorly understood. In this work, we introduce PRISM (Peer Review Intelligence via Structured Multi-dimensional assessment), a benchmarking framework that evaluates review quality across four dimensions: Depth of Analysis, Novelty Assessment,Flaw Identification & Major Issues Prioritization, and Multi-dimens"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26730","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.26730/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.26730","created_at":"2026-05-27T01:06:09.730374+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.26730v1","created_at":"2026-05-27T01:06:09.730374+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26730","created_at":"2026-05-27T01:06:09.730374+00:00"},{"alias_kind":"pith_short_12","alias_value":"FACHC5474IHH","created_at":"2026-05-27T01:06:09.730374+00:00"},{"alias_kind":"pith_short_16","alias_value":"FACHC5474IHHX5SY","created_at":"2026-05-27T01:06:09.730374+00:00"},{"alias_kind":"pith_short_8","alias_value":"FACHC547","created_at":"2026-05-27T01:06:09.730374+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2","json":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2.json","graph_json":"https://pith.science/api/pith-number/FACHC5474IHHX5SYZ5ORB6OSX2/graph.json","events_json":"https://pith.science/api/pith-number/FACHC5474IHHX5SYZ5ORB6OSX2/events.json","paper":"https://pith.science/paper/FACHC547"},"agent_actions":{"view_html":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2","download_json":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2.json","view_paper":"https://pith.science/paper/FACHC547","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.26730&json=true","fetch_graph":"https://pith.science/api/pith-number/FACHC5474IHHX5SYZ5ORB6OSX2/graph.json","fetch_events":"https://pith.science/api/pith-number/FACHC5474IHHX5SYZ5ORB6OSX2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2/action/storage_attestation","attest_author":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2/action/author_attestation","sign_citation":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2/action/citation_signature","submit_replication":"https://pith.science/pith/FACHC5474IHHX5SYZ5ORB6OSX2/action/replication_record"}},"created_at":"2026-05-27T01:06:09.730374+00:00","updated_at":"2026-05-27T01:06:09.730374+00:00"}