{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:OUD2UJ4DO7DH7RTUXP5P6HVJNL","short_pith_number":"pith:OUD2UJ4D","schema_version":"1.0","canonical_sha256":"7507aa278377c67fc674bbfaff1ea96ace133513325eec2d44dd00c3a83c7bd7","source":{"kind":"arxiv","id":"2510.15614","version":3},"attestation_state":"computed","paper":{"title":"HypoSpace: A Diagnostic Benchmark for Set-Valued Hypothesis Generation under Underdetermination and Sublinear Coverage Bounds","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anirudh Goyal, Beibei Lin, Dianbo Liu, Hongyu He, Qiran Zou, Tingting Chen, Yew-Soon Ong, Zifeng Yuan","submitted_at":"2025-10-17T13:00:32Z","abstract_excerpt":"Many scientific problems are underdetermined: multiple distinct hypotheses are equally consistent with the same observations. In such settings, effective inference requires not only producing valid explanations, but also systematically exploring and covering the admissible hypothesis set. We introduce HypoSpace, a benchmark that treats large language models (LLMs) as samplers over finite hypothesis spaces and evaluates them on three metrics: Validity, Uniqueness, and Recovery. HypoSpace spans three structured domains (causal graph inference, gravity-constrained 3D voxel reconstruction, and Boo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.15614","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-17T13:00:32Z","cross_cats_sorted":[],"title_canon_sha256":"a0938d4d04253feaf53dc1ca972aa35b17bb159263f6ee2513ff01b0288d0440","abstract_canon_sha256":"601e96f120b14c2f47451bb3df64a8299e8b02003bd4a3acdfc8629a585bbc79"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:02:23.736648Z","signature_b64":"CNRa/8h8zJNA03rpn5oCSonyVcXGvJgw/UUg+qTXm8UTt0zlEvrd/OQA8H85oK33Y73DoPOKHyDekgEsdK0cAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7507aa278377c67fc674bbfaff1ea96ace133513325eec2d44dd00c3a83c7bd7","last_reissued_at":"2026-06-01T01:02:23.735576Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:02:23.735576Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"HypoSpace: A Diagnostic Benchmark for Set-Valued Hypothesis Generation under Underdetermination and Sublinear Coverage Bounds","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Anirudh Goyal, Beibei Lin, Dianbo Liu, Hongyu He, Qiran Zou, Tingting Chen, Yew-Soon Ong, Zifeng Yuan","submitted_at":"2025-10-17T13:00:32Z","abstract_excerpt":"Many scientific problems are underdetermined: multiple distinct hypotheses are equally consistent with the same observations. In such settings, effective inference requires not only producing valid explanations, but also systematically exploring and covering the admissible hypothesis set. We introduce HypoSpace, a benchmark that treats large language models (LLMs) as samplers over finite hypothesis spaces and evaluates them on three metrics: Validity, Uniqueness, and Recovery. HypoSpace spans three structured domains (causal graph inference, gravity-constrained 3D voxel reconstruction, and Boo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.15614","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.15614/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.15614","created_at":"2026-06-01T01:02:23.735742+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.15614v3","created_at":"2026-06-01T01:02:23.735742+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.15614","created_at":"2026-06-01T01:02:23.735742+00:00"},{"alias_kind":"pith_short_12","alias_value":"OUD2UJ4DO7DH","created_at":"2026-06-01T01:02:23.735742+00:00"},{"alias_kind":"pith_short_16","alias_value":"OUD2UJ4DO7DH7RTU","created_at":"2026-06-01T01:02:23.735742+00:00"},{"alias_kind":"pith_short_8","alias_value":"OUD2UJ4D","created_at":"2026-06-01T01:02:23.735742+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.23602","citing_title":"GlowGS: Generative Semantic Feature Learning for 3D Gaussian Splatting in Nighttime Glow Scenes","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11258","citing_title":"Unlocking LLM Creativity in Science through Analogical Reasoning","ref_index":9,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL","json":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL.json","graph_json":"https://pith.science/api/pith-number/OUD2UJ4DO7DH7RTUXP5P6HVJNL/graph.json","events_json":"https://pith.science/api/pith-number/OUD2UJ4DO7DH7RTUXP5P6HVJNL/events.json","paper":"https://pith.science/paper/OUD2UJ4D"},"agent_actions":{"view_html":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL","download_json":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL.json","view_paper":"https://pith.science/paper/OUD2UJ4D","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.15614&json=true","fetch_graph":"https://pith.science/api/pith-number/OUD2UJ4DO7DH7RTUXP5P6HVJNL/graph.json","fetch_events":"https://pith.science/api/pith-number/OUD2UJ4DO7DH7RTUXP5P6HVJNL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL/action/storage_attestation","attest_author":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL/action/author_attestation","sign_citation":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL/action/citation_signature","submit_replication":"https://pith.science/pith/OUD2UJ4DO7DH7RTUXP5P6HVJNL/action/replication_record"}},"created_at":"2026-06-01T01:02:23.735742+00:00","updated_at":"2026-06-01T01:02:23.735742+00:00"}