{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:BE7XFALLKYMDFAE7ILRJBOXN77","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e84420c6664928768a56f466e66a2fb803252785eb7dcc9b8398f70cfa1ac3f7","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-09T16:32:14Z","title_canon_sha256":"8f140880c9647e685c0db5563553a5460c06f5f39003f6b3d995868f2b755ee6"},"schema_version":"1.0","source":{"id":"2606.11070","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11070","created_at":"2026-06-10T01:11:08Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11070v1","created_at":"2026-06-10T01:11:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11070","created_at":"2026-06-10T01:11:08Z"},{"alias_kind":"pith_short_12","alias_value":"BE7XFALLKYMD","created_at":"2026-06-10T01:11:08Z"},{"alias_kind":"pith_short_16","alias_value":"BE7XFALLKYMDFAE7","created_at":"2026-06-10T01:11:08Z"},{"alias_kind":"pith_short_8","alias_value":"BE7XFALL","created_at":"2026-06-10T01:11:08Z"}],"graph_snapshots":[{"event_id":"sha256:57997b98a224bb0bc7671f998821fde65ae26a2e4e99bc6e64292ba5bed25d46","target":"graph","created_at":"2026-06-10T01:11:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.11070/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent advances in reasoning and tool-calling capabilities of large language models (LLMs) have enabled increasingly capable agentic systems. However, existing benchmarks remain limited in task complexity, realism, and domain diversity, and often fail to capture interactions that span multiple domains, limiting their ability to evaluate agents in realistic multi-step settings that require sustained reasoning and coordination. To address these limitations, we introduce T1-Bench, a high-fidelity, comprehensive benchmark for evaluating agentic systems in realistic customer-facing, multi-domain en","authors_text":"Amartya Chakraborty, Anirban Das, Anmol Jain, Genta Indra Winata, Houhan Lu, Kshitij Tayal, Nadia Bathaee, Paresh Dashore, Sambit Sahu, Shikhhar Siingh, Shi-Xiong Zhang, Sriharsha Hatwar, Swasthi P Rao, Xiuzhu Lin, Yuzhen Lin","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-09T16:32:14Z","title":"T1-Bench: Benchmarking Multi-Scenario Agents in Real-World Domains"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11070","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:774b7a650ade0aa65a9a8f99306eb85289fa0e2aca7946c63260fd46e59660c4","target":"record","created_at":"2026-06-10T01:11:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e84420c6664928768a56f466e66a2fb803252785eb7dcc9b8398f70cfa1ac3f7","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-09T16:32:14Z","title_canon_sha256":"8f140880c9647e685c0db5563553a5460c06f5f39003f6b3d995868f2b755ee6"},"schema_version":"1.0","source":{"id":"2606.11070","kind":"arxiv","version":1}},"canonical_sha256":"093f72816b561832809f42e290baedfff0744c99708d106bad4bdf313d08a4b2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"093f72816b561832809f42e290baedfff0744c99708d106bad4bdf313d08a4b2","first_computed_at":"2026-06-10T01:11:08.267209Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-10T01:11:08.267209Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tkHyYpDMxgv1DwMvedbf/Knoha5kEETeUBBO21Sl2RdiYUt3eVY1WOhUC2DZ3odb/tH0uQjfU/yeN35UQY1gDA==","signature_status":"signed_v1","signed_at":"2026-06-10T01:11:08.268340Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.11070","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:774b7a650ade0aa65a9a8f99306eb85289fa0e2aca7946c63260fd46e59660c4","sha256:57997b98a224bb0bc7671f998821fde65ae26a2e4e99bc6e64292ba5bed25d46"],"state_sha256":"327f3973a245fd04422452588e1d621fb90d658e477606f6105735e85dc3c6dc"}