{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:OSF2LBR6UVW7G6RW2AEWJMBRW2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"879c669ccb5e12b791b7e1f288076db8a9077cee4e50f43f719820c10567736f","cross_cats_sorted":["cs.LG","cs.SE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-21T14:45:02Z","title_canon_sha256":"0ede07bc048f8f649b3ecc2d174cfbbf85d2f958589977e5da8109a599397f38"},"schema_version":"1.0","source":{"id":"2605.22564","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.22564","created_at":"2026-05-22T01:04:57Z"},{"alias_kind":"arxiv_version","alias_value":"2605.22564v1","created_at":"2026-05-22T01:04:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22564","created_at":"2026-05-22T01:04:57Z"},{"alias_kind":"pith_short_12","alias_value":"OSF2LBR6UVW7","created_at":"2026-05-22T01:04:57Z"},{"alias_kind":"pith_short_16","alias_value":"OSF2LBR6UVW7G6RW","created_at":"2026-05-22T01:04:57Z"},{"alias_kind":"pith_short_8","alias_value":"OSF2LBR6","created_at":"2026-05-22T01:04:57Z"}],"graph_snapshots":[{"event_id":"sha256:cce7ab1b5337fdf78f6c3baae754a3927b84a8fbfba7bd6167df3896da437019","target":"graph","created_at":"2026-05-22T01:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.22564/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Today, tool-calling agents are commonly evaluated or tested on static datasets of execution traces, including input commands, agent responses, and associated tool calls. However, internal production datasets are often insufficient or unusable for testing; for example, they may contain sensitive or proprietary data, or they may be too sparse to support comprehensive testing (especially pre-deployment). In these settings, practitioners are increasingly replacing or augmenting real datasets with synthetic ones for evaluation purposes. A key challenge is quantifying the relation between these synt","authors_text":"Aadyaa Maddi, Giulia Fanti, Shuaiqi Wang, Zinan Lin","cross_cats":["cs.LG","cs.SE"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-21T14:45:02Z","title":"SynAE: A Framework for Measuring the Quality of Synthetic Data for Tool-Calling Agent Evaluations"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22564","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fd1640464b6b20bedd79caf6f5c889fda72cfb8b5d360d6099c5ef32ce784702","target":"record","created_at":"2026-05-22T01:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"879c669ccb5e12b791b7e1f288076db8a9077cee4e50f43f719820c10567736f","cross_cats_sorted":["cs.LG","cs.SE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-21T14:45:02Z","title_canon_sha256":"0ede07bc048f8f649b3ecc2d174cfbbf85d2f958589977e5da8109a599397f38"},"schema_version":"1.0","source":{"id":"2605.22564","kind":"arxiv","version":1}},"canonical_sha256":"748ba5863ea56df37a36d00964b031b6bc138cbfc55e533f2ab501792ba1c645","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"748ba5863ea56df37a36d00964b031b6bc138cbfc55e533f2ab501792ba1c645","first_computed_at":"2026-05-22T01:04:57.673704Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:57.673704Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"k384pHR2TKpYofkWXd5DjQp+xlFfvdnLg9DRZFJhmxlLo/LQC2Pfi0nOskxNs4AIuUI0vulI/K6jklTBozfOAA==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:57.674504Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.22564","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fd1640464b6b20bedd79caf6f5c889fda72cfb8b5d360d6099c5ef32ce784702","sha256:cce7ab1b5337fdf78f6c3baae754a3927b84a8fbfba7bd6167df3896da437019"],"state_sha256":"bf20d8381656d736a4f0fbd28a212457a99564f5e95bb59d89141f18c1325b79"}