{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:56JFTSWGPGAHVUGF44HXLKF23G","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7ef04bb111e7f7237d546562af6cf8b3dbe8ebd1fd0466e3710fd1c0d5b028c5","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SE","submitted_at":"2025-11-04T09:27:01Z","title_canon_sha256":"4085f73b915aba0f60ba9942997e7559cecb089cd87cb1e0832b135f84a77c12"},"schema_version":"1.0","source":{"id":"2511.02399","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.02399","created_at":"2026-06-08T01:03:50Z"},{"alias_kind":"arxiv_version","alias_value":"2511.02399v3","created_at":"2026-06-08T01:03:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.02399","created_at":"2026-06-08T01:03:50Z"},{"alias_kind":"pith_short_12","alias_value":"56JFTSWGPGAH","created_at":"2026-06-08T01:03:50Z"},{"alias_kind":"pith_short_16","alias_value":"56JFTSWGPGAHVUGF","created_at":"2026-06-08T01:03:50Z"},{"alias_kind":"pith_short_8","alias_value":"56JFTSWG","created_at":"2026-06-08T01:03:50Z"}],"graph_snapshots":[{"event_id":"sha256:fb58e6a5b3c0ba27fbb0f9660ce8206998678c11c80b0702d3a8e93b82aea386","target":"graph","created_at":"2026-06-08T01:03:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We evaluate EvoDev on challenging Android development tasks and show that it outperforms the best-performing baseline, Claude Code, by a substantial margin of 56.8%, while improving single-agent performance by 16.0%-76.6% across different base LLMs."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the reported performance gains on Android tasks stem primarily from the Feature Map dependency modeling and context propagation rather than from unstated details in agent prompting, evaluation metrics, or task selection."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"EvoDev introduces an iterative feature-driven framework with a DAG-based Feature Map for context propagation that improves LLM agent performance on end-to-end software development tasks by 56.8% over the best baseline."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"EvoDev's Feature Map models feature dependencies and propagates context to let LLM agents outperform linear baselines by 56.8 percent on Android tasks."}],"snapshot_sha256":"9f2d3229e996938c770e87fb1d7739b4cadd243bcdd0872bc7306c96af7ad29e"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"61d5cc8fff534c4a3eaba70a409e31cee6c17213a0f3249d7215c652d87f0cb4"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2511.02399/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent advances in large language model agents offer the promise of automating end-to-end software development from natural language requirements. However, existing approaches largely adopt linear, waterfall-style pipelines, which oversimplify the iterative nature of real-world development and struggle with complex, large-scale projects. To address these limitations, we propose EvoDev, an iterative software development framework inspired by feature-driven development. EvoDev decomposes user requirements into a set of user-valued features and constructs a Feature Map, a directed acyclic graph t","authors_text":"Chen Xu, Chong Wang, Junwei Liu, Kaseng Wong, Tong Bai, Weitong Chen, Xin Peng, Yiling Lou","cross_cats":["cs.AI"],"headline":"EvoDev's Feature Map models feature dependencies and propagates context to let LLM agents outperform linear baselines by 56.8 percent on Android tasks.","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SE","submitted_at":"2025-11-04T09:27:01Z","title":"Towards Iterative End-to-End Software Development: A Feature-Driven Multi-Agent Framework"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.02399","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-18T01:37:20.673222Z","id":"e389b97b-c063-45de-9947-7603bcf6e65b","model_set":{"reader":"grok-4.3"},"one_line_summary":"EvoDev introduces an iterative feature-driven framework with a DAG-based Feature Map for context propagation that improves LLM agent performance on end-to-end software development tasks by 56.8% over the best baseline.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"EvoDev's Feature Map models feature dependencies and propagates context to let LLM agents outperform linear baselines by 56.8 percent on Android tasks.","strongest_claim":"We evaluate EvoDev on challenging Android development tasks and show that it outperforms the best-performing baseline, Claude Code, by a substantial margin of 56.8%, while improving single-agent performance by 16.0%-76.6% across different base LLMs.","weakest_assumption":"That the reported performance gains on Android tasks stem primarily from the Feature Map dependency modeling and context propagation rather than from unstated details in agent prompting, evaluation metrics, or task selection."}},"verdict_id":"e389b97b-c063-45de-9947-7603bcf6e65b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:913f8ca42caa73c1bd881536f7423483023352193aa4339edb33e605b06b53dd","target":"record","created_at":"2026-06-08T01:03:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7ef04bb111e7f7237d546562af6cf8b3dbe8ebd1fd0466e3710fd1c0d5b028c5","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SE","submitted_at":"2025-11-04T09:27:01Z","title_canon_sha256":"4085f73b915aba0f60ba9942997e7559cecb089cd87cb1e0832b135f84a77c12"},"schema_version":"1.0","source":{"id":"2511.02399","kind":"arxiv","version":3}},"canonical_sha256":"ef9259cac679807ad0c5e70f75a8bad9a6e51da699e04f26aaceab94bab4db3e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ef9259cac679807ad0c5e70f75a8bad9a6e51da699e04f26aaceab94bab4db3e","first_computed_at":"2026-06-08T01:03:50.708128Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:03:50.708128Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"TyVtHwTYB7ysPj8o7K3YG+a14YY/GnrMQZmgHOXKp/eP87snUA9PFfIrdkQncVjZWCDPj1g4b9k7LUo0q8XdBw==","signature_status":"signed_v1","signed_at":"2026-06-08T01:03:50.709093Z","signed_message":"canonical_sha256_bytes"},"source_id":"2511.02399","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:913f8ca42caa73c1bd881536f7423483023352193aa4339edb33e605b06b53dd","sha256:fb58e6a5b3c0ba27fbb0f9660ce8206998678c11c80b0702d3a8e93b82aea386"],"state_sha256":"4b4b633be1e5e0f6aa6ab7da3b8614f53706838f2f28586e94305843841223f0"}