{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TVKWGMVXMRIZCMNWHGN5B2ROTH","merge_version":"pith-open-graph-merge-v1","event_count":4,"valid_event_count":4,"invalid_event_count":0,"equivocation_count":1,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4a0dce75c28c94a95e249986298416735901d9edcf3fca7ca7d5c116af20bceb","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-21T08:45:50Z","title_canon_sha256":"6ead5618986c381dd49dc5b5216cfef647f96e980396e9180ec31ba25fe439d3"},"schema_version":"1.0","source":{"id":"2605.22175","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.22175","created_at":"2026-05-22T01:04:30Z"},{"alias_kind":"arxiv_version","alias_value":"2605.22175v1","created_at":"2026-05-22T01:04:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22175","created_at":"2026-05-22T01:04:30Z"},{"alias_kind":"pith_short_12","alias_value":"TVKWGMVXMRIZ","created_at":"2026-05-22T01:04:30Z"},{"alias_kind":"pith_short_16","alias_value":"TVKWGMVXMRIZCMNW","created_at":"2026-05-22T01:04:30Z"},{"alias_kind":"pith_short_8","alias_value":"TVKWGMVX","created_at":"2026-05-22T01:04:30Z"}],"graph_snapshots":[{"event_id":"sha256:34fc249cb536f1a7c6f49223610cb42d091455cc322522a7fa38ca68e7d21587","target":"graph","created_at":"2026-05-22T01:04:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.22175/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Evaluating software engineering capabilities has become a core component of modern large language models (LLMs); however, the key bottleneck hindering further scaling lies not in the scarcity of high-quality solutions, but in the lack of high-quality test suites. Test suites are indispensable both for synthesizing program repair trajectories and for providing precise feedback signals in reinforcement learning. Unfortunately, due to the high cost and difficulty of annotation, high-quality test suites have long been hard to obtain, while those automatically generated by LLMs tend to be superfici","authors_text":"Jinbo Wang, Kai Zhang, Mengdi Zhang, Yao Du, Yufeng Wang, Yuxuan Sun, Yuze Zhao, Zhenya Huang, Zhiyuan Ma","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-21T08:45:50Z","title":"SWE-Mutation: Can LLMs Generate Reliable Test Suites in Software Engineering?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22175","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a01a426181685eef97bf20d4307f5d9d01a45757fcde28505e3e64c1392d4481","target":"record","created_at":"2026-05-22T01:04:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4a0dce75c28c94a95e249986298416735901d9edcf3fca7ca7d5c116af20bceb","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-21T08:45:50Z","title_canon_sha256":"6ead5618986c381dd49dc5b5216cfef647f96e980396e9180ec31ba25fe439d3"},"schema_version":"1.0","source":{"id":"2605.22175","kind":"arxiv","version":1}},"canonical_sha256":"9d556332b764519131b6399bd0ea2e99f6287569a4100e2c3085285767ff0def","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9d556332b764519131b6399bd0ea2e99f6287569a4100e2c3085285767ff0def","first_computed_at":"2026-05-22T01:04:30.271057Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:30.271057Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lnqtNt6YJ4SWEW9BMiCANlesqV9EQywqqQQ5EcXcHeW5xkJ9HJXny6JSq6BcrcOwf/CLj7hLoR5hS9Fn0oL/AA==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:30.271820Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.22175","source_kind":"arxiv","source_version":1}}},"equivocations":[{"signer_id":"pith.science","event_type":"integrity_finding","target":"integrity","event_ids":["sha256:fb9ff3a48e7e7221160e2f1b0b91e15fff71befd485dbe4028d540d36d03121e","sha256:ff7d6ee9b4a381d758389b91049524fe1d6707221eeb462fed5ba3bc0e48b17f"]}],"invalid_events":[],"applied_event_ids":["sha256:a01a426181685eef97bf20d4307f5d9d01a45757fcde28505e3e64c1392d4481","sha256:34fc249cb536f1a7c6f49223610cb42d091455cc322522a7fa38ca68e7d21587"],"state_sha256":"78cfe5de8b4a57afb4a296eb0abe9526d5e8dabebea0492672169b93e8dccb43"}