{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:Z77G2SQJQRFD6RHINH6NKOYHMY","short_pith_number":"pith:Z77G2SQJ","schema_version":"1.0","canonical_sha256":"cffe6d4a09844a3f44e869fcd53b07662a8ac0bf2ce32b5584c790731f38b121","source":{"kind":"arxiv","id":"2605.30284","version":1},"attestation_state":"computed","paper":{"title":"ProjectionBench: Evaluating Scientific Hypothesis Generation in LLMs Under Progressive Information Disclosure","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"A. J. Lew (1), M. J. Buehler (1) ((1) Unreasonable Labs), Y. Cao (1)","submitted_at":"2026-05-28T17:38:19Z","abstract_excerpt":"Scientific discovery is an inherently creative and uncertain process, requiring reasoning beyond the recall of known knowledge. While many benchmarks have been proposed to evaluate large language model (LLM) performance on deep research tasks via multi-hop retrieval, their innovative reasoning abilities essential for true scientific discovery remain largely untested. We introduce a benchmark framework for evaluating model performance in scientific discovery and reasoning, building up from a raw problem to the classical null hypothesis test. In our framework, models initially receive only the t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.30284","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-28T17:38:19Z","cross_cats_sorted":[],"title_canon_sha256":"5ab3f62fb8d7a5da0a05c2e2ea1bee257247f4d3b9ce4f64aae4d615111516d7","abstract_canon_sha256":"483002713262caddaec12bc45950b2a0f7b12f4486e11fb88892c42e829c3d36"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T02:06:15.235821Z","signature_b64":"mEy45I+2wgxQANZ8JHS5MIoclEVVcVVlpxf96KNKWfQx03XfGQ0JNoLtTsiwQMzUpEhcjVe7tE/jCz7Lip22Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cffe6d4a09844a3f44e869fcd53b07662a8ac0bf2ce32b5584c790731f38b121","last_reissued_at":"2026-05-29T02:06:15.235401Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T02:06:15.235401Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"ProjectionBench: Evaluating Scientific Hypothesis Generation in LLMs Under Progressive Information Disclosure","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"A. J. Lew (1), M. J. Buehler (1) ((1) Unreasonable Labs), Y. Cao (1)","submitted_at":"2026-05-28T17:38:19Z","abstract_excerpt":"Scientific discovery is an inherently creative and uncertain process, requiring reasoning beyond the recall of known knowledge. While many benchmarks have been proposed to evaluate large language model (LLM) performance on deep research tasks via multi-hop retrieval, their innovative reasoning abilities essential for true scientific discovery remain largely untested. We introduce a benchmark framework for evaluating model performance in scientific discovery and reasoning, building up from a raw problem to the classical null hypothesis test. In our framework, models initially receive only the t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30284","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30284/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.30284","created_at":"2026-05-29T02:06:15.235474+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.30284v1","created_at":"2026-05-29T02:06:15.235474+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30284","created_at":"2026-05-29T02:06:15.235474+00:00"},{"alias_kind":"pith_short_12","alias_value":"Z77G2SQJQRFD","created_at":"2026-05-29T02:06:15.235474+00:00"},{"alias_kind":"pith_short_16","alias_value":"Z77G2SQJQRFD6RHI","created_at":"2026-05-29T02:06:15.235474+00:00"},{"alias_kind":"pith_short_8","alias_value":"Z77G2SQJ","created_at":"2026-05-29T02:06:15.235474+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY","json":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY.json","graph_json":"https://pith.science/api/pith-number/Z77G2SQJQRFD6RHINH6NKOYHMY/graph.json","events_json":"https://pith.science/api/pith-number/Z77G2SQJQRFD6RHINH6NKOYHMY/events.json","paper":"https://pith.science/paper/Z77G2SQJ"},"agent_actions":{"view_html":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY","download_json":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY.json","view_paper":"https://pith.science/paper/Z77G2SQJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.30284&json=true","fetch_graph":"https://pith.science/api/pith-number/Z77G2SQJQRFD6RHINH6NKOYHMY/graph.json","fetch_events":"https://pith.science/api/pith-number/Z77G2SQJQRFD6RHINH6NKOYHMY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY/action/storage_attestation","attest_author":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY/action/author_attestation","sign_citation":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY/action/citation_signature","submit_replication":"https://pith.science/pith/Z77G2SQJQRFD6RHINH6NKOYHMY/action/replication_record"}},"created_at":"2026-05-29T02:06:15.235474+00:00","updated_at":"2026-05-29T02:06:15.235474+00:00"}