{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:U7RVE3AUQEPREVVNNRTK5QKFGK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"43d9bc2322eb4186c4a223c1a780b89edee68b5bb103b026ffa31054e51298f0","cross_cats_sorted":["cs.CE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T14:46:34Z","title_canon_sha256":"fdeabf1fe4f03f0d34e956887d9e9eebb2c56eca3eb762a55267cba9dfecb2fd"},"schema_version":"1.0","source":{"id":"2605.15218","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15218","created_at":"2026-05-20T00:00:46Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15218v1","created_at":"2026-05-20T00:00:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15218","created_at":"2026-05-20T00:00:46Z"},{"alias_kind":"pith_short_12","alias_value":"U7RVE3AUQEPR","created_at":"2026-05-20T00:00:46Z"},{"alias_kind":"pith_short_16","alias_value":"U7RVE3AUQEPREVVN","created_at":"2026-05-20T00:00:46Z"},{"alias_kind":"pith_short_8","alias_value":"U7RVE3AU","created_at":"2026-05-20T00:00:46Z"}],"graph_snapshots":[{"event_id":"sha256:0e23885b2895ebbf9d487f0ddbb03e46e49de7836976f5cf6f8317640ffd30eb","target":"graph","created_at":"2026-05-20T00:00:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Model_only achieves the best completion rate (0.9267), task score (3.59/4), total score (9.16/10), and zero-intervention rate (0.84), outperforming rule_only (0.7733, 3.17/4, 7.03/10, 0.00) and no_recovery (0.6933, 2.74/4, 5.60/10, 0.00) with large effect sizes (Cliff's delta = 0.81-0.87)."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The benchmark uses deliberately simple geometries to isolate recovery-policy effects, and the observed performance differences will hold when the same recovery ladder is applied to more complex real-world geometries and loading conditions."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"CAX-Agent is a three-layer agent harness for MAPDL automation whose model-driven recovery policy reaches 0.93 task completion and 0.84 zero-intervention rate on 50 simple structural benchmarks, outperforming rule-only and no-recovery baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A model-driven recovery policy inside a lightweight agent harness raises APDL automation completion rates above 92 percent."}],"snapshot_sha256":"ea9f9323b240dc106826de0880d5ac5132d398ffb7d51c7d820bdb64dbbf9a83"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7d67b0ff6b6a2fcfe339b225ed386ef4adf1ac65a679d6261dfe5765b0df22f8"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T22:41:58.372997Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T18:01:18.634046Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T17:50:44.232531Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.838325Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.15218/integrity.json","findings":[],"snapshot_sha256":"de625f5e90dda18b25d6e66bf361230bbb472354ed0c95bf509daad80425ada2","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models deployed for MAPDL finite-element simulation face practical reliability challenges: without structured execution control, tool encapsulation, and fault recovery, outputs may be inconsistent and task failures are common. The Agent Harness paradigm addresses this by inserting domain-specific orchestration middleware that manages tool lifecycles, workflow state, and recovery escalation. This paper presents the architecture of CAX-Agent, a lightweight agent harness purpose-built for MAPDL automation, and empirically evaluates one of its core components -- the recovery policy.","authors_text":"Chenying Lin, Haiyan Qiang, Liang Yu, Ran Wang, Yichen Hai, Yi He","cross_cats":["cs.CE"],"headline":"A model-driven recovery policy inside a lightweight agent harness raises APDL automation completion rates above 92 percent.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T14:46:34Z","title":"CAX-Agent: A Lightweight Agent Harness for Reliable APDL Automation"},"references":{"count":26,"internal_anchors":2,"resolved_work":26,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Attention is all you need,","work_id":"a479d910-ec22-4c4f-8745-0e478756ccba","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"BERT: Pre- training of deep bidirectional transformers for language understanding,","work_id":"281d14ff-34d2-42df-9297-1358c352bfa1","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Language models are few-shot learners,","work_id":"bac81291-4816-4ff3-ac72-60203570d359","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"ReAct: Synergizing reasoning and acting in language models","work_id":"dcede5c1-a91b-43d9-a097-8083603cb625","year":2023},{"cited_arxiv_id":"2604.11378","doi":"","is_internal_anchor":true,"ref_index":5,"title":"From Agent Loops to Structured Graphs:A Scheduler-Theoretic Framework for LLM Agent Execution","work_id":"6171cc48-d73f-46d7-8202-375ba39c6d1b","year":2026}],"snapshot_sha256":"71285e62d6b854f79167296532d6f9c9af1ffadec12626211f4cafd43ecf66c9"},"source":{"id":"2605.15218","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T17:35:23.801661Z","id":"3e475f8f-3a97-4c74-9554-ad605f8fbef5","model_set":{"reader":"grok-4.3"},"one_line_summary":"CAX-Agent is a three-layer agent harness for MAPDL automation whose model-driven recovery policy reaches 0.93 task completion and 0.84 zero-intervention rate on 50 simple structural benchmarks, outperforming rule-only and no-recovery baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A model-driven recovery policy inside a lightweight agent harness raises APDL automation completion rates above 92 percent.","strongest_claim":"Model_only achieves the best completion rate (0.9267), task score (3.59/4), total score (9.16/10), and zero-intervention rate (0.84), outperforming rule_only (0.7733, 3.17/4, 7.03/10, 0.00) and no_recovery (0.6933, 2.74/4, 5.60/10, 0.00) with large effect sizes (Cliff's delta = 0.81-0.87).","weakest_assumption":"The benchmark uses deliberately simple geometries to isolate recovery-policy effects, and the observed performance differences will hold when the same recovery ladder is applied to more complex real-world geometries and loading conditions."}},"verdict_id":"3e475f8f-3a97-4c74-9554-ad605f8fbef5"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:179fd080b982358ddfdb7420f89a3b23285a7bb0ec9cff23460ba250f07f3e12","target":"record","created_at":"2026-05-20T00:00:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"43d9bc2322eb4186c4a223c1a780b89edee68b5bb103b026ffa31054e51298f0","cross_cats_sorted":["cs.CE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T14:46:34Z","title_canon_sha256":"fdeabf1fe4f03f0d34e956887d9e9eebb2c56eca3eb762a55267cba9dfecb2fd"},"schema_version":"1.0","source":{"id":"2605.15218","kind":"arxiv","version":1}},"canonical_sha256":"a7e3526c14811f1256ad6c66aec14532b406bdd7bc55603454a15ac88f504cf4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a7e3526c14811f1256ad6c66aec14532b406bdd7bc55603454a15ac88f504cf4","first_computed_at":"2026-05-20T00:00:46.802905Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:46.802905Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UMCeQWK+XdXhGLPGrlzP5NGBlhcUbpcn/xlnkjhsfUNg7g8X1u+H/VglwQY6ku5WO0wZr/w6nd8dPIX8NKKWBQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:46.803842Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.15218","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:179fd080b982358ddfdb7420f89a3b23285a7bb0ec9cff23460ba250f07f3e12","sha256:0e23885b2895ebbf9d487f0ddbb03e46e49de7836976f5cf6f8317640ffd30eb"],"state_sha256":"b0e28bea2bd135ce7238a6244498305899f79d92ea5779b306bb9e3588ae7509"}