{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:Y53NXWCH3VFGPI7AGZG3W4DBNH","short_pith_number":"pith:Y53NXWCH","canonical_record":{"source":{"id":"2605.16551","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-15T18:50:43Z","cross_cats_sorted":[],"title_canon_sha256":"3be0e043c42841a7ce3814004dd3002f0172f4626484e44c88fd2c8915a59879","abstract_canon_sha256":"411f6e69592f563c30f8d368d151b7173fc851bb54926bb8966dac1ed0208a9a"},"schema_version":"1.0"},"canonical_sha256":"c776dbd847dd4a67a3e0364dbb706169c69955208864b54c1b5c23311ab2319e","source":{"kind":"arxiv","id":"2605.16551","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16551","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16551v1","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16551","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"pith_short_12","alias_value":"Y53NXWCH3VFG","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"pith_short_16","alias_value":"Y53NXWCH3VFGPI7A","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"pith_short_8","alias_value":"Y53NXWCH","created_at":"2026-05-20T00:02:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:Y53NXWCH3VFGPI7AGZG3W4DBNH","target":"record","payload":{"canonical_record":{"source":{"id":"2605.16551","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-15T18:50:43Z","cross_cats_sorted":[],"title_canon_sha256":"3be0e043c42841a7ce3814004dd3002f0172f4626484e44c88fd2c8915a59879","abstract_canon_sha256":"411f6e69592f563c30f8d368d151b7173fc851bb54926bb8966dac1ed0208a9a"},"schema_version":"1.0"},"canonical_sha256":"c776dbd847dd4a67a3e0364dbb706169c69955208864b54c1b5c23311ab2319e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:28.787427Z","signature_b64":"KFwVKI7+o+U8ZlMJSgC+Z2wwnk1txQ9zI4C0n/od+tl57i8Strb9IdzKDiXgnLOQnlBi5op1nqQ4YoyucKQaDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c776dbd847dd4a67a3e0364dbb706169c69955208864b54c1b5c23311ab2319e","last_reissued_at":"2026-05-20T00:02:28.786599Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:28.786599Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.16551","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:28Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"h6ZmzY44SqkRNQ3c9jg3ml3OETEo2UhZbHoy5ahX48a8CwwpM4tqzBC5LDbBVlp1y2kVwfqHYDJhBifLjbjkBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T13:27:33.038247Z"},"content_sha256":"44266ee2055e5f96b5614dd002190b25397ffec626b70e6bc55eddfec0744710","schema_version":"1.0","event_id":"sha256:44266ee2055e5f96b5614dd002190b25397ffec626b70e6bc55eddfec0744710"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:Y53NXWCH3VFGPI7AGZG3W4DBNH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"PQR: A Framework to Generate Diverse and Realistic User Queries that Elicit QA Agent Failures","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Arpit Sharma, Luigi Liu, Omar Yahia, Yunan Lu, Zhou Yu","submitted_at":"2026-05-15T18:50:43Z","abstract_excerpt":"Evaluating LLM-based agents remains challenging because identifying meaningful failure cases often requires substantial human effort to design realistic test scenarios. Prior works primarily focus on automatically discovering agent failures induced by adversarial users, while overlooking queries with real user intents that also trigger agent failures. We introduce PQR, a framework that not only surfaces agent failures with respect to specific objectives (e.g., helpfulness, safety, etc.) but also resembles real users' intents. PQR operates through an iterative interaction between two complement"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16551","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16551/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T19:21:56.899613Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.633461Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"e02e30d67637414f816e83407626c4a63eb7c7c8c8213f303bd2ef7d9cb5ddb0"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:28Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vfutpCAcEnlIUenj1SUIekvdbxAP4x0U5b6BQDbut+xFHzoXIXtnqalEvxNcPQf3EGOjVjrC6a2ZotRYDy16CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T13:27:33.038793Z"},"content_sha256":"dd3fe60383d10cd76636c185b703046702eb394aa7fced2c23d7d9780cd929c1","schema_version":"1.0","event_id":"sha256:dd3fe60383d10cd76636c185b703046702eb394aa7fced2c23d7d9780cd929c1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH/bundle.json","state_url":"https://pith.science/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T13:27:33Z","links":{"resolver":"https://pith.science/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH","bundle":"https://pith.science/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH/bundle.json","state":"https://pith.science/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/Y53NXWCH3VFGPI7AGZG3W4DBNH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:Y53NXWCH3VFGPI7AGZG3W4DBNH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"411f6e69592f563c30f8d368d151b7173fc851bb54926bb8966dac1ed0208a9a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-15T18:50:43Z","title_canon_sha256":"3be0e043c42841a7ce3814004dd3002f0172f4626484e44c88fd2c8915a59879"},"schema_version":"1.0","source":{"id":"2605.16551","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16551","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16551v1","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16551","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"pith_short_12","alias_value":"Y53NXWCH3VFG","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"pith_short_16","alias_value":"Y53NXWCH3VFGPI7A","created_at":"2026-05-20T00:02:28Z"},{"alias_kind":"pith_short_8","alias_value":"Y53NXWCH","created_at":"2026-05-20T00:02:28Z"}],"graph_snapshots":[{"event_id":"sha256:dd3fe60383d10cd76636c185b703046702eb394aa7fced2c23d7d9780cd929c1","target":"graph","created_at":"2026-05-20T00:02:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T19:21:56.899613Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.633461Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.16551/integrity.json","findings":[],"snapshot_sha256":"e02e30d67637414f816e83407626c4a63eb7c7c8c8213f303bd2ef7d9cb5ddb0","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Evaluating LLM-based agents remains challenging because identifying meaningful failure cases often requires substantial human effort to design realistic test scenarios. Prior works primarily focus on automatically discovering agent failures induced by adversarial users, while overlooking queries with real user intents that also trigger agent failures. We introduce PQR, a framework that not only surfaces agent failures with respect to specific objectives (e.g., helpfulness, safety, etc.) but also resembles real users' intents. PQR operates through an iterative interaction between two complement","authors_text":"Arpit Sharma, Luigi Liu, Omar Yahia, Yunan Lu, Zhou Yu","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-15T18:50:43Z","title":"PQR: A Framework to Generate Diverse and Realistic User Queries that Elicit QA Agent Failures"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16551","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:44266ee2055e5f96b5614dd002190b25397ffec626b70e6bc55eddfec0744710","target":"record","created_at":"2026-05-20T00:02:28Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"411f6e69592f563c30f8d368d151b7173fc851bb54926bb8966dac1ed0208a9a","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-15T18:50:43Z","title_canon_sha256":"3be0e043c42841a7ce3814004dd3002f0172f4626484e44c88fd2c8915a59879"},"schema_version":"1.0","source":{"id":"2605.16551","kind":"arxiv","version":1}},"canonical_sha256":"c776dbd847dd4a67a3e0364dbb706169c69955208864b54c1b5c23311ab2319e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c776dbd847dd4a67a3e0364dbb706169c69955208864b54c1b5c23311ab2319e","first_computed_at":"2026-05-20T00:02:28.786599Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:28.786599Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KFwVKI7+o+U8ZlMJSgC+Z2wwnk1txQ9zI4C0n/od+tl57i8Strb9IdzKDiXgnLOQnlBi5op1nqQ4YoyucKQaDQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:28.787427Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.16551","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:44266ee2055e5f96b5614dd002190b25397ffec626b70e6bc55eddfec0744710","sha256:dd3fe60383d10cd76636c185b703046702eb394aa7fced2c23d7d9780cd929c1"],"state_sha256":"26b8a66a092fbf85ef784618638598f75cbfeaa4f8fdeb80bd723564f73188e9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lxiZ7QoYg8yqyfK3ANmkkrJyIVEJkQnMy8hLBZRsUhju86QUtahKKruQjd3Jw01YdSp++UyH6qILtAGWP/LrCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T13:27:33.042473Z","bundle_sha256":"a4ce96b5d687e8001e324b386911f0bd326cd1f1c4284d2aae563d353ae797eb"}}