{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:GN52DZPLYTTRWT7O6V3DPQ53Y7","short_pith_number":"pith:GN52DZPL","schema_version":"1.0","canonical_sha256":"337ba1e5ebc4e71b4feef57637c3bbc7c72d41b3c3b7792f5d7b49bc9626af58","source":{"kind":"arxiv","id":"2606.03889","version":1},"attestation_state":"computed","paper":{"title":"RealClawBench: Live OpenClaw Benchmarks from Real Developer-Agent Sessions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Guangxiang Zhao, Lin Sun, Tong Yang, Weihong Lin, Xiangzheng Zhang, Yaoming Li, Yilun Yao, Yuxuan Tian, Zhewen Tan, Zongwei Lv","submitted_at":"2026-06-02T16:51:24Z","abstract_excerpt":"Agent benchmarks should reflect what users actually ask deployed agents to do, yet existing benchmarks often miss key realism properties of real developer-agent sessions. We introduce RealClawBench, a live benchmark framework built from real OpenClaw sessions to capture the distribution, diversity, and real-world difficulty of deployed agent use. Real user requests are challenging to benchmark because they often depend on local execution environments, involve implicit or underspecified intent, and require nontrivial verification. RealClawBench addresses these challenges with two core mechanism"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.03889","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T16:51:24Z","cross_cats_sorted":[],"title_canon_sha256":"5b87d9e8534d9006eb5695dc83f588f0036861740fc569eb341f831897a03f9d","abstract_canon_sha256":"07a1fa83c5d171e1ee02e2fb109e50714a24201159b2077009d9b7ef845e1d6a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:06:06.085337Z","signature_b64":"fGECfBxTje7qZ/zPdWPDwRNoH4RGrS/XIQFCDcumW+Cv4mg202pBdUQh1JP/2l7lpw71RaO1+e6p0vJoaOHaCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"337ba1e5ebc4e71b4feef57637c3bbc7c72d41b3c3b7792f5d7b49bc9626af58","last_reissued_at":"2026-06-03T02:06:06.084912Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:06:06.084912Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RealClawBench: Live OpenClaw Benchmarks from Real Developer-Agent Sessions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Guangxiang Zhao, Lin Sun, Tong Yang, Weihong Lin, Xiangzheng Zhang, Yaoming Li, Yilun Yao, Yuxuan Tian, Zhewen Tan, Zongwei Lv","submitted_at":"2026-06-02T16:51:24Z","abstract_excerpt":"Agent benchmarks should reflect what users actually ask deployed agents to do, yet existing benchmarks often miss key realism properties of real developer-agent sessions. We introduce RealClawBench, a live benchmark framework built from real OpenClaw sessions to capture the distribution, diversity, and real-world difficulty of deployed agent use. Real user requests are challenging to benchmark because they often depend on local execution environments, involve implicit or underspecified intent, and require nontrivial verification. RealClawBench addresses these challenges with two core mechanism"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03889","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.03889/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.03889","created_at":"2026-06-03T02:06:06.084970+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.03889v1","created_at":"2026-06-03T02:06:06.084970+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03889","created_at":"2026-06-03T02:06:06.084970+00:00"},{"alias_kind":"pith_short_12","alias_value":"GN52DZPLYTTR","created_at":"2026-06-03T02:06:06.084970+00:00"},{"alias_kind":"pith_short_16","alias_value":"GN52DZPLYTTRWT7O","created_at":"2026-06-03T02:06:06.084970+00:00"},{"alias_kind":"pith_short_8","alias_value":"GN52DZPL","created_at":"2026-06-03T02:06:06.084970+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7","json":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7.json","graph_json":"https://pith.science/api/pith-number/GN52DZPLYTTRWT7O6V3DPQ53Y7/graph.json","events_json":"https://pith.science/api/pith-number/GN52DZPLYTTRWT7O6V3DPQ53Y7/events.json","paper":"https://pith.science/paper/GN52DZPL"},"agent_actions":{"view_html":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7","download_json":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7.json","view_paper":"https://pith.science/paper/GN52DZPL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.03889&json=true","fetch_graph":"https://pith.science/api/pith-number/GN52DZPLYTTRWT7O6V3DPQ53Y7/graph.json","fetch_events":"https://pith.science/api/pith-number/GN52DZPLYTTRWT7O6V3DPQ53Y7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7/action/storage_attestation","attest_author":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7/action/author_attestation","sign_citation":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7/action/citation_signature","submit_replication":"https://pith.science/pith/GN52DZPLYTTRWT7O6V3DPQ53Y7/action/replication_record"}},"created_at":"2026-06-03T02:06:06.084970+00:00","updated_at":"2026-06-03T02:06:06.084970+00:00"}