{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:WOKFU2YA4W5244ULW5ADOJXD6V","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d5ae84bb94af6bfa0d1f21ed38ae070ccc8903e311024ad5e1b88c9ec0d91bbd","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-21T16:49:11Z","title_canon_sha256":"41da58f1e93571f91bcb2071e591c669ddaa6f1ff535e36b5a25c91ce5606168"},"schema_version":"1.0","source":{"id":"2604.19667","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.19667","created_at":"2026-05-27T02:05:20Z"},{"alias_kind":"arxiv_version","alias_value":"2604.19667v2","created_at":"2026-05-27T02:05:20Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.19667","created_at":"2026-05-27T02:05:20Z"},{"alias_kind":"pith_short_12","alias_value":"WOKFU2YA4W52","created_at":"2026-05-27T02:05:20Z"},{"alias_kind":"pith_short_16","alias_value":"WOKFU2YA4W5244UL","created_at":"2026-05-27T02:05:20Z"},{"alias_kind":"pith_short_8","alias_value":"WOKFU2YA","created_at":"2026-05-27T02:05:20Z"}],"graph_snapshots":[{"event_id":"sha256:863df4564d30b3dbafe8f6af48a697945885388f0763fb43cb94d02b5e461870","target":"graph","created_at":"2026-05-27T02:05:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"while state-of-the-art language models can often capture high-level intent, they struggle to generate correct, stable, and executable workflows, especially under complex or changing requirements. Although our agentic framework yields up to 5.34% resolve rate gains, the remaining real-world gap positions Chat2Workflow as a foundation for advancing industrial-grade automation."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The collected real-world business workflows are representative of practical industrial needs and that generated workflows can be transformed and directly deployed to platforms such as Dify and Coze without loss of intended functionality."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Chat2Workflow benchmark shows that state-of-the-art LLMs often grasp high-level intent for visual workflow generation but fail to produce correct, stable, executable outputs, with an agentic framework delivering only modest 5.34% gains."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Large language models capture high-level intent but struggle to produce correct, stable, executable visual workflows from natural language."}],"snapshot_sha256":"7acd2e09704cec868af8167a6b26ad3e54138650ecabc4737fd0187b4ce6b3f5"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-21T16:34:13.890497Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T02:40:24.162240Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.19667/integrity.json","findings":[],"snapshot_sha256":"cf2050198f41793818f5842ce926ec2321132feb3753d3ce0c19815d4ab8cd60","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"At present, executable visual workflows have emerged as a mainstream paradigm in real-world industrial deployments, offering strong reliability and controllability. However, in current practice, such workflows are almost entirely constructed through manual engineering: developers must carefully design workflows, write prompts for each step, and repeatedly revise the logic as requirements evolve -- making development costly, time-consuming, and error-prone. To study whether large language models can automate this multi-round interaction process, we introduce Chat2Workflow, a benchmark for gener","authors_text":"Buqiang Xu, Guozhou Zheng, Ningyu Zhang, Shuofei Qiao, Yijun Wang, Yi Zhong, Zifei Shan","cross_cats":["cs.AI","cs.CV","cs.LG","cs.MA"],"headline":"Large language models capture high-level intent but struggle to produce correct, stable, executable visual workflows from natural language.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-21T16:49:11Z","title":"Chat2Workflow: A Benchmark for Generating Executable Visual Workflows with Natural Language"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.19667","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T02:51:23.631783Z","id":"89c9cdd7-3fdf-496b-a282-e18367fee6ca","model_set":{"reader":"grok-4.3"},"one_line_summary":"Chat2Workflow benchmark shows that state-of-the-art LLMs often grasp high-level intent for visual workflow generation but fail to produce correct, stable, executable outputs, with an agentic framework delivering only modest 5.34% gains.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Large language models capture high-level intent but struggle to produce correct, stable, executable visual workflows from natural language.","strongest_claim":"while state-of-the-art language models can often capture high-level intent, they struggle to generate correct, stable, and executable workflows, especially under complex or changing requirements. Although our agentic framework yields up to 5.34% resolve rate gains, the remaining real-world gap positions Chat2Workflow as a foundation for advancing industrial-grade automation.","weakest_assumption":"The collected real-world business workflows are representative of practical industrial needs and that generated workflows can be transformed and directly deployed to platforms such as Dify and Coze without loss of intended functionality."}},"verdict_id":"89c9cdd7-3fdf-496b-a282-e18367fee6ca"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a5f8925885b746c544b9603669c30cb8677ee9cfc2e09ea96feeb0742822569c","target":"record","created_at":"2026-05-27T02:05:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d5ae84bb94af6bfa0d1f21ed38ae070ccc8903e311024ad5e1b88c9ec0d91bbd","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","cs.MA"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-21T16:49:11Z","title_canon_sha256":"41da58f1e93571f91bcb2071e591c669ddaa6f1ff535e36b5a25c91ce5606168"},"schema_version":"1.0","source":{"id":"2604.19667","kind":"arxiv","version":2}},"canonical_sha256":"b3945a6b00e5bbae728bb7403726e3f555c4fad04f353c5f4059668581b9b447","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b3945a6b00e5bbae728bb7403726e3f555c4fad04f353c5f4059668581b9b447","first_computed_at":"2026-05-27T02:05:20.012379Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T02:05:20.012379Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/gX0sQPPHDEo0BsnmEiuZOyFmy3rK6D0SpgMXV7NjPZI/sObEiOilCEiv9j2nhgOcu+AMbX2uJScD68271pvDg==","signature_status":"signed_v1","signed_at":"2026-05-27T02:05:20.012880Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.19667","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a5f8925885b746c544b9603669c30cb8677ee9cfc2e09ea96feeb0742822569c","sha256:863df4564d30b3dbafe8f6af48a697945885388f0763fb43cb94d02b5e461870"],"state_sha256":"5db6e1e4f251ba03707562f2769194f422a09271551a5554de2356a8206a64e4"}