{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:DSF2UF2RIXZFEXV2N4YC22PKB5","short_pith_number":"pith:DSF2UF2R","canonical_record":{"source":{"id":"2510.19186","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-22T02:44:11Z","cross_cats_sorted":[],"title_canon_sha256":"e8a580bb38efed3a1196d9a24db8f50698ceaea72d316a4b7aac948ddb2b7351","abstract_canon_sha256":"6816afda39263926053ffd6c1e33b0746d4f7e94fef3032d4bb4f1c471786f99"},"schema_version":"1.0"},"canonical_sha256":"1c8baa175145f2525eba6f302d69ea0f6ce23a6b90439619013cf803a0853a34","source":{"kind":"arxiv","id":"2510.19186","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.19186","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"arxiv_version","alias_value":"2510.19186v2","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.19186","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"pith_short_12","alias_value":"DSF2UF2RIXZF","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"pith_short_16","alias_value":"DSF2UF2RIXZFEXV2","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"pith_short_8","alias_value":"DSF2UF2R","created_at":"2026-06-09T02:07:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:DSF2UF2RIXZFEXV2N4YC22PKB5","target":"record","payload":{"canonical_record":{"source":{"id":"2510.19186","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-22T02:44:11Z","cross_cats_sorted":[],"title_canon_sha256":"e8a580bb38efed3a1196d9a24db8f50698ceaea72d316a4b7aac948ddb2b7351","abstract_canon_sha256":"6816afda39263926053ffd6c1e33b0746d4f7e94fef3032d4bb4f1c471786f99"},"schema_version":"1.0"},"canonical_sha256":"1c8baa175145f2525eba6f302d69ea0f6ce23a6b90439619013cf803a0853a34","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:11.685600Z","signature_b64":"25WJ+9CeNG+vUQCNyRNtpsgwzbXWKTu8csrILiYUu0CNWAwfHsrMGH/dGzXM21E5BXVKs5mRAGwq7e+8eNT5Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1c8baa175145f2525eba6f302d69ea0f6ce23a6b90439619013cf803a0853a34","last_reissued_at":"2026-06-09T02:07:11.684717Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:11.684717Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.19186","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PvOzaMPt0q8BnVAw17lDD0ifxvf+mBczqFL6NzeBQRudQFCxA2AUx3ANpcqlZ0xZCBoY6ku2AShYg3mYW9WcDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T23:34:58.379993Z"},"content_sha256":"e3b80df6395c4378667cf0ce4ff99fa71027215af177b290e4d09df1335279b6","schema_version":"1.0","event_id":"sha256:e3b80df6395c4378667cf0ce4ff99fa71027215af177b290e4d09df1335279b6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:DSF2UF2RIXZFEXV2N4YC22PKB5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"When Users Are Happy but Agents Are Wrong: Multi-Dimensional Evaluation of Tool-Augmented Dialogue","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Rashmi Gangadharaiah, Shamik Roy, Tanya Shourya, Vinayshekhar Bannihatti Kumar, Yingfan Wang, Zhaoyi Joey Hou","submitted_at":"2025-10-22T02:44:11Z","abstract_excerpt":"Evaluating conversational AI systems that use external tools is challenging, as errors can arise from complex interactions among user, agent, and tools. While existing evaluation methods assess either user satisfaction or agents' tool-calling capabilities, they fail to capture critical errors in multi-turn tool-augmented dialogues-such as when agents misinterpret tool results yet appear satisfactory to users. We introduce TRACE, a benchmark of systematically synthesized tool-augmented conversations covering diverse error cases. Evaluation with state-of-the-art conversation evaluation framework"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.19186","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.19186/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Cx37HFwpHwdKg/DQ7tdtIAa7Z3NCgb5K2CSkN1Hr/60HOfWJmF8DnBVE0RfUv+K7hFPzZKozKmTxtSzCsPlrCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T23:34:58.380719Z"},"content_sha256":"6975e5125f47873a953a7af217ccd3ce3fb35e861c19e06984cd9c6e69b72f6e","schema_version":"1.0","event_id":"sha256:6975e5125f47873a953a7af217ccd3ce3fb35e861c19e06984cd9c6e69b72f6e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DSF2UF2RIXZFEXV2N4YC22PKB5/bundle.json","state_url":"https://pith.science/pith/DSF2UF2RIXZFEXV2N4YC22PKB5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DSF2UF2RIXZFEXV2N4YC22PKB5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T23:34:58Z","links":{"resolver":"https://pith.science/pith/DSF2UF2RIXZFEXV2N4YC22PKB5","bundle":"https://pith.science/pith/DSF2UF2RIXZFEXV2N4YC22PKB5/bundle.json","state":"https://pith.science/pith/DSF2UF2RIXZFEXV2N4YC22PKB5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DSF2UF2RIXZFEXV2N4YC22PKB5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:DSF2UF2RIXZFEXV2N4YC22PKB5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6816afda39263926053ffd6c1e33b0746d4f7e94fef3032d4bb4f1c471786f99","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-22T02:44:11Z","title_canon_sha256":"e8a580bb38efed3a1196d9a24db8f50698ceaea72d316a4b7aac948ddb2b7351"},"schema_version":"1.0","source":{"id":"2510.19186","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.19186","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"arxiv_version","alias_value":"2510.19186v2","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.19186","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"pith_short_12","alias_value":"DSF2UF2RIXZF","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"pith_short_16","alias_value":"DSF2UF2RIXZFEXV2","created_at":"2026-06-09T02:07:11Z"},{"alias_kind":"pith_short_8","alias_value":"DSF2UF2R","created_at":"2026-06-09T02:07:11Z"}],"graph_snapshots":[{"event_id":"sha256:6975e5125f47873a953a7af217ccd3ce3fb35e861c19e06984cd9c6e69b72f6e","target":"graph","created_at":"2026-06-09T02:07:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.19186/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Evaluating conversational AI systems that use external tools is challenging, as errors can arise from complex interactions among user, agent, and tools. While existing evaluation methods assess either user satisfaction or agents' tool-calling capabilities, they fail to capture critical errors in multi-turn tool-augmented dialogues-such as when agents misinterpret tool results yet appear satisfactory to users. We introduce TRACE, a benchmark of systematically synthesized tool-augmented conversations covering diverse error cases. Evaluation with state-of-the-art conversation evaluation framework","authors_text":"Rashmi Gangadharaiah, Shamik Roy, Tanya Shourya, Vinayshekhar Bannihatti Kumar, Yingfan Wang, Zhaoyi Joey Hou","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-22T02:44:11Z","title":"When Users Are Happy but Agents Are Wrong: Multi-Dimensional Evaluation of Tool-Augmented Dialogue"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.19186","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e3b80df6395c4378667cf0ce4ff99fa71027215af177b290e4d09df1335279b6","target":"record","created_at":"2026-06-09T02:07:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6816afda39263926053ffd6c1e33b0746d4f7e94fef3032d4bb4f1c471786f99","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-22T02:44:11Z","title_canon_sha256":"e8a580bb38efed3a1196d9a24db8f50698ceaea72d316a4b7aac948ddb2b7351"},"schema_version":"1.0","source":{"id":"2510.19186","kind":"arxiv","version":2}},"canonical_sha256":"1c8baa175145f2525eba6f302d69ea0f6ce23a6b90439619013cf803a0853a34","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1c8baa175145f2525eba6f302d69ea0f6ce23a6b90439619013cf803a0853a34","first_computed_at":"2026-06-09T02:07:11.684717Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:11.684717Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"25WJ+9CeNG+vUQCNyRNtpsgwzbXWKTu8csrILiYUu0CNWAwfHsrMGH/dGzXM21E5BXVKs5mRAGwq7e+8eNT5Aw==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:11.685600Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.19186","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e3b80df6395c4378667cf0ce4ff99fa71027215af177b290e4d09df1335279b6","sha256:6975e5125f47873a953a7af217ccd3ce3fb35e861c19e06984cd9c6e69b72f6e"],"state_sha256":"05c3f90405defb5fdb66b7dd062b17b4d38bd74022fb6482a53c60ad6421319d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BDvVg7kXl9FM5LPnt9YZVTaYr1G3Ni+t4d+d0ZVLF4FZk/836NHdF4e2q1fyGI7c7N3EZ86v6BlewA6elB9iDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T23:34:58.384462Z","bundle_sha256":"80fd29de3bcc090b8e445cf533d5ea89030848e5f4d9225c6c4bf48456405980"}}