{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:UARD3O5LATPWSRGGFAQSHZDF6T","short_pith_number":"pith:UARD3O5L","schema_version":"1.0","canonical_sha256":"a0223dbbab04df6944c6282123e465f4c78fd65c15dc9c4ea48f2a4faac70d18","source":{"kind":"arxiv","id":"2606.01789","version":1},"attestation_state":"computed","paper":{"title":"Consistency evaluation of benchmarks used for causal discovery","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chen Wang, Chihui Chen, Lina Yao, Yuzhe Zhang","submitted_at":"2026-06-01T07:09:06Z","abstract_excerpt":"In graphical causal model, causal discovery aims to construct a causal graph based on numerical data and domain knowledge in plain text. However, the evaluation of causal discovery methods remains a challenge in the area as the progress of domain researches often makes benchmark causal graphs contain mis-aligned knowledge. This problem especially affects the evaluation of large language model (LLM) based causal discovery methods as they are sensitive to the new discoveries in the literature. This work is the first to systematically study the quality of benchmark causal graphs. Specifically, we"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.01789","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-01T07:09:06Z","cross_cats_sorted":[],"title_canon_sha256":"d6c2a6edd1780c6b6b469e4d2ee5a89a74a5544437a367ab14b6e9d2f8c92bdb","abstract_canon_sha256":"2e8a396dc2cf87a6ae4aad576526f7cf13f5eec93e7ef8752c2af6cd6da2ebe7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T02:04:56.894188Z","signature_b64":"hXdL3AvP2DM5dBA0eYWwbZklbj4YAP0RYG35Tk7Kq/vTWJc2zNwSnkgq3bbGKq3Fe+tLF1YvHinD2D54dc3kDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a0223dbbab04df6944c6282123e465f4c78fd65c15dc9c4ea48f2a4faac70d18","last_reissued_at":"2026-06-02T02:04:56.893755Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T02:04:56.893755Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Consistency evaluation of benchmarks used for causal discovery","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chen Wang, Chihui Chen, Lina Yao, Yuzhe Zhang","submitted_at":"2026-06-01T07:09:06Z","abstract_excerpt":"In graphical causal model, causal discovery aims to construct a causal graph based on numerical data and domain knowledge in plain text. However, the evaluation of causal discovery methods remains a challenge in the area as the progress of domain researches often makes benchmark causal graphs contain mis-aligned knowledge. This problem especially affects the evaluation of large language model (LLM) based causal discovery methods as they are sensitive to the new discoveries in the literature. This work is the first to systematically study the quality of benchmark causal graphs. Specifically, we"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.01789","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.01789/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.01789","created_at":"2026-06-02T02:04:56.893821+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.01789v1","created_at":"2026-06-02T02:04:56.893821+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.01789","created_at":"2026-06-02T02:04:56.893821+00:00"},{"alias_kind":"pith_short_12","alias_value":"UARD3O5LATPW","created_at":"2026-06-02T02:04:56.893821+00:00"},{"alias_kind":"pith_short_16","alias_value":"UARD3O5LATPWSRGG","created_at":"2026-06-02T02:04:56.893821+00:00"},{"alias_kind":"pith_short_8","alias_value":"UARD3O5L","created_at":"2026-06-02T02:04:56.893821+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T","json":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T.json","graph_json":"https://pith.science/api/pith-number/UARD3O5LATPWSRGGFAQSHZDF6T/graph.json","events_json":"https://pith.science/api/pith-number/UARD3O5LATPWSRGGFAQSHZDF6T/events.json","paper":"https://pith.science/paper/UARD3O5L"},"agent_actions":{"view_html":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T","download_json":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T.json","view_paper":"https://pith.science/paper/UARD3O5L","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.01789&json=true","fetch_graph":"https://pith.science/api/pith-number/UARD3O5LATPWSRGGFAQSHZDF6T/graph.json","fetch_events":"https://pith.science/api/pith-number/UARD3O5LATPWSRGGFAQSHZDF6T/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T/action/storage_attestation","attest_author":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T/action/author_attestation","sign_citation":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T/action/citation_signature","submit_replication":"https://pith.science/pith/UARD3O5LATPWSRGGFAQSHZDF6T/action/replication_record"}},"created_at":"2026-06-02T02:04:56.893821+00:00","updated_at":"2026-06-02T02:04:56.893821+00:00"}