{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:OCCWOOYXI766AV6QO6JEUL4QAH","short_pith_number":"pith:OCCWOOYX","canonical_record":{"source":{"id":"2507.05791","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-07-08T08:52:18Z","cross_cats_sorted":[],"title_canon_sha256":"d43367af695db65a4d1a5833e0216bd06fe05d9ab9e715e5b69b51fc453af1b3","abstract_canon_sha256":"721ce7dfa5e9d17b05a9e709d15219d1ea0c55acc4df42941349a814e2152ec5"},"schema_version":"1.0"},"canonical_sha256":"7085673b1747fde057d077924a2f9001fdd867ceb0215ddc24e8cbccd1d53a5f","source":{"kind":"arxiv","id":"2507.05791","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.05791","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"arxiv_version","alias_value":"2507.05791v5","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.05791","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"pith_short_12","alias_value":"OCCWOOYXI766","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"OCCWOOYXI766AV6Q","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"OCCWOOYX","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:OCCWOOYXI766AV6QO6JEUL4QAH","target":"record","payload":{"canonical_record":{"source":{"id":"2507.05791","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-07-08T08:52:18Z","cross_cats_sorted":[],"title_canon_sha256":"d43367af695db65a4d1a5833e0216bd06fe05d9ab9e715e5b69b51fc453af1b3","abstract_canon_sha256":"721ce7dfa5e9d17b05a9e709d15219d1ea0c55acc4df42941349a814e2152ec5"},"schema_version":"1.0"},"canonical_sha256":"7085673b1747fde057d077924a2f9001fdd867ceb0215ddc24e8cbccd1d53a5f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:13.906463Z","signature_b64":"KSBmJopFo4WiefBW5eWSHEcZTnd7TGEcem2d6mUI+hp9GakToDcbu0gX5/3Bvcaad07w04qyDcYwzdOe7rf3Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7085673b1747fde057d077924a2f9001fdd867ceb0215ddc24e8cbccd1d53a5f","last_reissued_at":"2026-05-17T23:38:13.905780Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:13.905780Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2507.05791","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZS3hWJD6p9eQ5xIPDwGN4n9os7u0l70hCysIHw31/F2m8zfgwO856TE7t67dmqfKHCDbo72697u1eCtsIjurBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T10:30:41.329961Z"},"content_sha256":"472318e5070e391716abc938be87b73f5e06551501d405a2f8f6e87a04e735e9","schema_version":"1.0","event_id":"sha256:472318e5070e391716abc938be87b73f5e06551501d405a2f8f6e87a04e735e9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:OCCWOOYXI766AV6QO6JEUL4QAH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"GTA1: GUI Test-time Scaling Agent","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"GTA1 uses test-time scaling to select optimal action proposals and reinforcement learning to enhance visual grounding for GUI agents.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Amrita Saha, Caiming Xiong, Dongxu Li, Junnan Li, Junzhe Huang, Liyuan Pan, Ran Xu, Silvio Savarese, Yan Yang, Yuhao Yang, Yutong Dai, Zeyuan Chen, Zhiyuan Hu, Zirui Zhao, Ziyang Luo","submitted_at":"2025-07-08T08:52:18Z","abstract_excerpt":"Graphical user interface (GUI) agents autonomously complete tasks across platforms (\\eg, Linux) by sequentially decomposing user instructions into action proposals that iteratively interact with visual elements in the evolving environment. However, two main challenges arise: i) planning (\\ie, the action proposal sequence) under expansive action space, where selecting an appropriate plan is non-trivial, as many valid ones may exist; ii) accurately grounding actions in complex and high-resolution interfaces, \\ie, precisely interacting with visual targets. This paper investigates the aforemention"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"GTA1 achieves state-of-the-art performance on both grounding and agent task execution benchmarks.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"A judge model can reliably identify the best action proposal among multiple samples without introducing systematic errors or bias.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"GTA1 combines test-time scaling for action plan selection with RL-based grounding to achieve SOTA results on GUI agent benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"GTA1 uses test-time scaling to select optimal action proposals and reinforcement learning to enhance visual grounding for GUI agents.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d922dea575d2a47ed05ddaee6cc04bb78f1ca28afcb1813a1a808335b6e3aa83"},"source":{"id":"2507.05791","kind":"arxiv","version":5},"verdict":{"id":"00d967e9-ef12-4245-ad95-308d41c6191d","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T13:49:50.823266Z","strongest_claim":"GTA1 achieves state-of-the-art performance on both grounding and agent task execution benchmarks.","one_line_summary":"GTA1 combines test-time scaling for action plan selection with RL-based grounding to achieve SOTA results on GUI agent benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"A judge model can reliably identify the best action proposal among multiple samples without introducing systematic errors or bias.","pith_extraction_headline":"GTA1 uses test-time scaling to select optimal action proposals and reinforcement learning to enhance visual grounding for GUI agents."},"references":{"count":46,"sample":[{"doi":"","year":2024,"title":"Aria-ui: Visual grounding for gui instruc- tions","work_id":"c8c5855d-0bc7-4633-9657-01ee8050d81b","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents","work_id":"9def1724-6fd2-4d5b-8339-4c1ee76e62f8","ref_index":2,"cited_arxiv_id":"2410.05243","is_internal_anchor":true},{"doi":"","year":2025,"title":"Screenspot-pro: Gui grounding for professional high- resolution computer use.arXiv, abs/2504.07981","work_id":"013f64c6-243e-4f27-82e1-75d82d566552","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"OS-ATLAS: A Foundation Action Model for Generalist GUI Agents","work_id":"16e00be2-1641-403c-8835-c50a6628f483","ref_index":4,"cited_arxiv_id":"2410.23218","is_internal_anchor":true},{"doi":"","year":2024,"title":"SeeClick: Harnessing GUI Grounding for Advanced Visual GUI Agents","work_id":"8fe50425-9d6d-4080-bd43-51b3d0d0e5f6","ref_index":5,"cited_arxiv_id":"2401.10935","is_internal_anchor":true}],"resolved_work":46,"snapshot_sha256":"5784d51e294067447834388dec7de7ccbe9148a0ad45cb9c690220945858497c","internal_anchors":12},"formal_canon":{"evidence_count":2,"snapshot_sha256":"33f98ca9a9b8e25eeb0e9002590de99626964040c14f12692c49f06a40da860a"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"00d967e9-ef12-4245-ad95-308d41c6191d"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ChmvTESVz+wPKCW7+CBdZd+z5QC5mBzpEnOJEUcFakMHVpowt5rVASEN+rUJQmJVoM3i4zYKGY1yOkuFe978CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T10:30:41.330995Z"},"content_sha256":"d897c7c7b86b9898b138cb6d1ebaa52e42594a429d3a2207937d4f401975073e","schema_version":"1.0","event_id":"sha256:d897c7c7b86b9898b138cb6d1ebaa52e42594a429d3a2207937d4f401975073e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OCCWOOYXI766AV6QO6JEUL4QAH/bundle.json","state_url":"https://pith.science/pith/OCCWOOYXI766AV6QO6JEUL4QAH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OCCWOOYXI766AV6QO6JEUL4QAH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T10:30:41Z","links":{"resolver":"https://pith.science/pith/OCCWOOYXI766AV6QO6JEUL4QAH","bundle":"https://pith.science/pith/OCCWOOYXI766AV6QO6JEUL4QAH/bundle.json","state":"https://pith.science/pith/OCCWOOYXI766AV6QO6JEUL4QAH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OCCWOOYXI766AV6QO6JEUL4QAH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:OCCWOOYXI766AV6QO6JEUL4QAH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"721ce7dfa5e9d17b05a9e709d15219d1ea0c55acc4df42941349a814e2152ec5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-07-08T08:52:18Z","title_canon_sha256":"d43367af695db65a4d1a5833e0216bd06fe05d9ab9e715e5b69b51fc453af1b3"},"schema_version":"1.0","source":{"id":"2507.05791","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.05791","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"arxiv_version","alias_value":"2507.05791v5","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.05791","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"pith_short_12","alias_value":"OCCWOOYXI766","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"OCCWOOYXI766AV6Q","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"OCCWOOYX","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:d897c7c7b86b9898b138cb6d1ebaa52e42594a429d3a2207937d4f401975073e","target":"graph","created_at":"2026-05-17T23:38:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"GTA1 achieves state-of-the-art performance on both grounding and agent task execution benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"A judge model can reliably identify the best action proposal among multiple samples without introducing systematic errors or bias."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"GTA1 combines test-time scaling for action plan selection with RL-based grounding to achieve SOTA results on GUI agent benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"GTA1 uses test-time scaling to select optimal action proposals and reinforcement learning to enhance visual grounding for GUI agents."}],"snapshot_sha256":"d922dea575d2a47ed05ddaee6cc04bb78f1ca28afcb1813a1a808335b6e3aa83"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"33f98ca9a9b8e25eeb0e9002590de99626964040c14f12692c49f06a40da860a"},"paper":{"abstract_excerpt":"Graphical user interface (GUI) agents autonomously complete tasks across platforms (\\eg, Linux) by sequentially decomposing user instructions into action proposals that iteratively interact with visual elements in the evolving environment. However, two main challenges arise: i) planning (\\ie, the action proposal sequence) under expansive action space, where selecting an appropriate plan is non-trivial, as many valid ones may exist; ii) accurately grounding actions in complex and high-resolution interfaces, \\ie, precisely interacting with visual targets. This paper investigates the aforemention","authors_text":"Amrita Saha, Caiming Xiong, Dongxu Li, Junnan Li, Junzhe Huang, Liyuan Pan, Ran Xu, Silvio Savarese, Yan Yang, Yuhao Yang, Yutong Dai, Zeyuan Chen, Zhiyuan Hu, Zirui Zhao, Ziyang Luo","cross_cats":[],"headline":"GTA1 uses test-time scaling to select optimal action proposals and reinforcement learning to enhance visual grounding for GUI agents.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-07-08T08:52:18Z","title":"GTA1: GUI Test-time Scaling Agent"},"references":{"count":46,"internal_anchors":12,"resolved_work":46,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Aria-ui: Visual grounding for gui instruc- tions","work_id":"c8c5855d-0bc7-4633-9657-01ee8050d81b","year":2024},{"cited_arxiv_id":"2410.05243","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents","work_id":"9def1724-6fd2-4d5b-8339-4c1ee76e62f8","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Screenspot-pro: Gui grounding for professional high- resolution computer use.arXiv, abs/2504.07981","work_id":"013f64c6-243e-4f27-82e1-75d82d566552","year":2025},{"cited_arxiv_id":"2410.23218","doi":"","is_internal_anchor":true,"ref_index":4,"title":"OS-ATLAS: A Foundation Action Model for Generalist GUI Agents","work_id":"16e00be2-1641-403c-8835-c50a6628f483","year":2024},{"cited_arxiv_id":"2401.10935","doi":"","is_internal_anchor":true,"ref_index":5,"title":"SeeClick: Harnessing GUI Grounding for Advanced Visual GUI Agents","work_id":"8fe50425-9d6d-4080-bd43-51b3d0d0e5f6","year":2024}],"snapshot_sha256":"5784d51e294067447834388dec7de7ccbe9148a0ad45cb9c690220945858497c"},"source":{"id":"2507.05791","kind":"arxiv","version":5},"verdict":{"created_at":"2026-05-17T13:49:50.823266Z","id":"00d967e9-ef12-4245-ad95-308d41c6191d","model_set":{"reader":"grok-4.3"},"one_line_summary":"GTA1 combines test-time scaling for action plan selection with RL-based grounding to achieve SOTA results on GUI agent benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"GTA1 uses test-time scaling to select optimal action proposals and reinforcement learning to enhance visual grounding for GUI agents.","strongest_claim":"GTA1 achieves state-of-the-art performance on both grounding and agent task execution benchmarks.","weakest_assumption":"A judge model can reliably identify the best action proposal among multiple samples without introducing systematic errors or bias."}},"verdict_id":"00d967e9-ef12-4245-ad95-308d41c6191d"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:472318e5070e391716abc938be87b73f5e06551501d405a2f8f6e87a04e735e9","target":"record","created_at":"2026-05-17T23:38:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"721ce7dfa5e9d17b05a9e709d15219d1ea0c55acc4df42941349a814e2152ec5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-07-08T08:52:18Z","title_canon_sha256":"d43367af695db65a4d1a5833e0216bd06fe05d9ab9e715e5b69b51fc453af1b3"},"schema_version":"1.0","source":{"id":"2507.05791","kind":"arxiv","version":5}},"canonical_sha256":"7085673b1747fde057d077924a2f9001fdd867ceb0215ddc24e8cbccd1d53a5f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7085673b1747fde057d077924a2f9001fdd867ceb0215ddc24e8cbccd1d53a5f","first_computed_at":"2026-05-17T23:38:13.905780Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:13.905780Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KSBmJopFo4WiefBW5eWSHEcZTnd7TGEcem2d6mUI+hp9GakToDcbu0gX5/3Bvcaad07w04qyDcYwzdOe7rf3Bw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:13.906463Z","signed_message":"canonical_sha256_bytes"},"source_id":"2507.05791","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:472318e5070e391716abc938be87b73f5e06551501d405a2f8f6e87a04e735e9","sha256:d897c7c7b86b9898b138cb6d1ebaa52e42594a429d3a2207937d4f401975073e"],"state_sha256":"d3a3bbf8191efb396b7a3c5a038e54bc26d3381796fe281feb3443eeedcf699e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MAHRfBK1fG411kfuETCeOxzQY+OWdTqwnPFY7TZVTD6WJSQ8MqWsPIalDnWF+yo1IS9QAaFWx3cARusCXb0xCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T10:30:41.335990Z","bundle_sha256":"47f66df39526e0b6227330b865453cb9e657a91327f2ecac888cfa1061074b35"}}