{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CU6ENH3VJ3T2GSWRF76IH727P4","short_pith_number":"pith:CU6ENH3V","schema_version":"1.0","canonical_sha256":"153c469f754ee7a34ad12ffc83ff5f7f17a21fdb7b1b0f521839b5c5adb1d736","source":{"kind":"arxiv","id":"2605.27761","version":1},"attestation_state":"computed","paper":{"title":"AndroidDaily: A Verifiable Benchmark for Mobile GUI Agents on Real-World Closed-Source Applications","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SE"],"primary_cat":"cs.CV","authors_text":"Daxin Jiang, Fang Xu, Haolong Yan, Hongbing Li, Jiahe Lv, Jia Wang, Junbo Qi, Kaijun Tan, Litao Liu, Osamu Yoshie, Xiangyu Zhang, Xin Huang, Yeqing Shen, Yifan Sui, Zheng Ge, Zhimin Fan, Ziyang Meng","submitted_at":"2026-05-26T23:19:42Z","abstract_excerpt":"The rapid development of GUI foundation models and mobile GUI agents has spurred numerous evaluation benchmarks, yet most rely on simulated environments or open-source applications, leaving real-world closed-source applications largely unevaluated. The core difficulty is that closed-source applications do not expose internal states, making traditional automatic verification inapplicable. To bridge this gap, we introduce AndroidDaily, a large-scale benchmark comprising 350 realistic daily-use tasks across 94 high-frequency Android applications spanning transportation, shopping, local services, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.27761","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T23:19:42Z","cross_cats_sorted":["cs.SE"],"title_canon_sha256":"52216c3862961d0297872ff3ae1b5604356568f03f0f545df7bc009606ea6922","abstract_canon_sha256":"02f989d4cca0fe1914df0237995b1cda1d62a83ae13de4a13e50b8c9d71b9c21"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:48.238064Z","signature_b64":"zPrhRSsN9drPlRX6nq7NO9UKXZvvWI6+SLaYIfaR5oI/VCNH1R0HjUhmkcH12nPiSgqa6l/d/yqzVqcyQ6u7Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"153c469f754ee7a34ad12ffc83ff5f7f17a21fdb7b1b0f521839b5c5adb1d736","last_reissued_at":"2026-05-28T01:04:48.237662Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:48.237662Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"AndroidDaily: A Verifiable Benchmark for Mobile GUI Agents on Real-World Closed-Source Applications","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.SE"],"primary_cat":"cs.CV","authors_text":"Daxin Jiang, Fang Xu, Haolong Yan, Hongbing Li, Jiahe Lv, Jia Wang, Junbo Qi, Kaijun Tan, Litao Liu, Osamu Yoshie, Xiangyu Zhang, Xin Huang, Yeqing Shen, Yifan Sui, Zheng Ge, Zhimin Fan, Ziyang Meng","submitted_at":"2026-05-26T23:19:42Z","abstract_excerpt":"The rapid development of GUI foundation models and mobile GUI agents has spurred numerous evaluation benchmarks, yet most rely on simulated environments or open-source applications, leaving real-world closed-source applications largely unevaluated. The core difficulty is that closed-source applications do not expose internal states, making traditional automatic verification inapplicable. To bridge this gap, we introduce AndroidDaily, a large-scale benchmark comprising 350 realistic daily-use tasks across 94 high-frequency Android applications spanning transportation, shopping, local services, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27761","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27761/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.27761","created_at":"2026-05-28T01:04:48.237717+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.27761v1","created_at":"2026-05-28T01:04:48.237717+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27761","created_at":"2026-05-28T01:04:48.237717+00:00"},{"alias_kind":"pith_short_12","alias_value":"CU6ENH3VJ3T2","created_at":"2026-05-28T01:04:48.237717+00:00"},{"alias_kind":"pith_short_16","alias_value":"CU6ENH3VJ3T2GSWR","created_at":"2026-05-28T01:04:48.237717+00:00"},{"alias_kind":"pith_short_8","alias_value":"CU6ENH3V","created_at":"2026-05-28T01:04:48.237717+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4","json":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4.json","graph_json":"https://pith.science/api/pith-number/CU6ENH3VJ3T2GSWRF76IH727P4/graph.json","events_json":"https://pith.science/api/pith-number/CU6ENH3VJ3T2GSWRF76IH727P4/events.json","paper":"https://pith.science/paper/CU6ENH3V"},"agent_actions":{"view_html":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4","download_json":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4.json","view_paper":"https://pith.science/paper/CU6ENH3V","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.27761&json=true","fetch_graph":"https://pith.science/api/pith-number/CU6ENH3VJ3T2GSWRF76IH727P4/graph.json","fetch_events":"https://pith.science/api/pith-number/CU6ENH3VJ3T2GSWRF76IH727P4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4/action/storage_attestation","attest_author":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4/action/author_attestation","sign_citation":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4/action/citation_signature","submit_replication":"https://pith.science/pith/CU6ENH3VJ3T2GSWRF76IH727P4/action/replication_record"}},"created_at":"2026-05-28T01:04:48.237717+00:00","updated_at":"2026-05-28T01:04:48.237717+00:00"}