{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:CVVQ5U6ZAPZQOEDEYH7QLSO6PQ","short_pith_number":"pith:CVVQ5U6Z","canonical_record":{"source":{"id":"2401.10935","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2024-01-17T08:10:35Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"9c130e118c2a1c05f74f4892c3b481834ad6af8d966941736349a41f6527fb8a","abstract_canon_sha256":"c715e54ca2fc5df30d79d57a56510b383f51991a97577d6e6757f967c307f952"},"schema_version":"1.0"},"canonical_sha256":"156b0ed3d903f3071064c1ff05c9de7c107098706c7beb3b249c632e0ef6faf4","source":{"kind":"arxiv","id":"2401.10935","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2401.10935","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2401.10935v2","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2401.10935","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"CVVQ5U6ZAPZQ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"CVVQ5U6ZAPZQOEDE","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"CVVQ5U6Z","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:CVVQ5U6ZAPZQOEDEYH7QLSO6PQ","target":"record","payload":{"canonical_record":{"source":{"id":"2401.10935","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2024-01-17T08:10:35Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"9c130e118c2a1c05f74f4892c3b481834ad6af8d966941736349a41f6527fb8a","abstract_canon_sha256":"c715e54ca2fc5df30d79d57a56510b383f51991a97577d6e6757f967c307f952"},"schema_version":"1.0"},"canonical_sha256":"156b0ed3d903f3071064c1ff05c9de7c107098706c7beb3b249c632e0ef6faf4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:14.419307Z","signature_b64":"VXDlMSGk/xs/Y01obyKhmgS3c6nzgZYZVhU0eyq7bv0PYAyr/lbLbpVwL2y99E6E/7H5WWRpqlUaAKNSeTVEAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"156b0ed3d903f3071064c1ff05c9de7c107098706c7beb3b249c632e0ef6faf4","last_reissued_at":"2026-05-17T23:38:14.418669Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:14.418669Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2401.10935","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pNe3nP+S7ME/a/+L1JNVCZVBJdK1u+cgUdlwo1/koKMasxNf9gei+5bmM/GVF1F4K2AexMX/nETqe8tcdPPMCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T04:22:45.078511Z"},"content_sha256":"331bd79e811fa20efc95591402df0a0fcc37f040dc00b358389634f2988d5f39","schema_version":"1.0","event_id":"sha256:331bd79e811fa20efc95591402df0a0fcc37f040dc00b358389634f2988d5f39"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:CVVQ5U6ZAPZQOEDEYH7QLSO6PQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SeeClick: Harnessing GUI Grounding for Advanced Visual GUI Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Advancements in GUI grounding directly improve the performance of visual agents that automate tasks from screenshots alone.","cross_cats":["cs.AI"],"primary_cat":"cs.HC","authors_text":"Fangzhi Xu, Jianbing Zhang, Kanzhi Cheng, Qiushi Sun, Yantao Li, Yougang Chu, Zhiyong Wu","submitted_at":"2024-01-17T08:10:35Z","abstract_excerpt":"Graphical User Interface (GUI) agents are designed to automate complex tasks on digital devices, such as smartphones and desktops. Most existing GUI agents interact with the environment through extracted structured data, which can be notably lengthy (e.g., HTML) and occasionally inaccessible (e.g., on desktops). To alleviate this issue, we propose a novel visual GUI agent -- SeeClick, which only relies on screenshots for task automation. In our preliminary study, we have discovered a key challenge in developing visual GUI agents: GUI grounding -- the capacity to accurately locate screen elemen"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"advancements in GUI grounding directly correlate with enhanced performance in downstream GUI agent tasks","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the automatically curated GUI grounding data is sufficiently high-quality and representative to enable effective transfer to real agent tasks across environments.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"SeeClick improves visual GUI agents via GUI grounding pre-training on automatically curated data and introduces the ScreenSpot benchmark, with results indicating that stronger grounding boosts downstream task performance.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Advancements in GUI grounding directly improve the performance of visual agents that automate tasks from screenshots alone.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"31450ec01983bb27c2184222e9efbc8aa01f89caefedb5a5a3348063ccf3ae81"},"source":{"id":"2401.10935","kind":"arxiv","version":2},"verdict":{"id":"a13589ef-02ee-4f0f-b4b4-ed487bf180e8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T10:04:24.837735Z","strongest_claim":"advancements in GUI grounding directly correlate with enhanced performance in downstream GUI agent tasks","one_line_summary":"SeeClick improves visual GUI agents via GUI grounding pre-training on automatically curated data and introduces the ScreenSpot benchmark, with results indicating that stronger grounding boosts downstream task performance.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the automatically curated GUI grounding data is sufficiently high-quality and representative to enable effective transfer to real agent tasks across environments.","pith_extraction_headline":"Advancements in GUI grounding directly improve the performance of visual agents that automate tasks from screenshots alone."},"references":{"count":81,"sample":[{"doi":"","year":1972,"title":"Aho and Jeffrey D","work_id":"b1f5cb43-a3c7-4ea0-85e7-9ccc9dfe1588","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1983,"title":"Publications Manual , year = \"1983\", publisher =","work_id":"aca2b566-99e0-4ebb-9c7a-a81219531259","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1145/322234.322243","year":1981,"title":"Chandra and Dexter C","work_id":"c3270592-bd69-4213-95e1-4aaf8312be9b","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Scalable training of","work_id":"aef70eae-f816-4598-84ec-429a2c09f5fc","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1997,"title":"Dan Gusfield , title =. 1997","work_id":"852d89f5-1e7b-4296-b4f2-71e578b5e9f6","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":81,"snapshot_sha256":"47ca75fa2081800e3c2abcf76335eada545a4a8ec4fd2b8a871ee32595ca72c7","internal_anchors":24},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7bb9bea7937b18735f61bdd93645b1bbe4fb4fbd19c2ddf38f9ed35cbe88971d"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"a13589ef-02ee-4f0f-b4b4-ed487bf180e8"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I/p0r7LN/32zewu8kege2UR1M5qm96bRuDeYG8S7uLxsgQf6E5NXHAs7VizeOVtUyoJBeNJsXfqGjD05iLBSAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T04:22:45.079522Z"},"content_sha256":"a5c3350ab4b1072e4c3b7d5f33fa930a253ff8c109e4faf3d3d56402907a4659","schema_version":"1.0","event_id":"sha256:a5c3350ab4b1072e4c3b7d5f33fa930a253ff8c109e4faf3d3d56402907a4659"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ/bundle.json","state_url":"https://pith.science/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-19T04:22:45Z","links":{"resolver":"https://pith.science/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ","bundle":"https://pith.science/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ/bundle.json","state":"https://pith.science/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CVVQ5U6ZAPZQOEDEYH7QLSO6PQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:CVVQ5U6ZAPZQOEDEYH7QLSO6PQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c715e54ca2fc5df30d79d57a56510b383f51991a97577d6e6757f967c307f952","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2024-01-17T08:10:35Z","title_canon_sha256":"9c130e118c2a1c05f74f4892c3b481834ad6af8d966941736349a41f6527fb8a"},"schema_version":"1.0","source":{"id":"2401.10935","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2401.10935","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"arxiv_version","alias_value":"2401.10935v2","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2401.10935","created_at":"2026-05-17T23:38:14Z"},{"alias_kind":"pith_short_12","alias_value":"CVVQ5U6ZAPZQ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"CVVQ5U6ZAPZQOEDE","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"CVVQ5U6Z","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:a5c3350ab4b1072e4c3b7d5f33fa930a253ff8c109e4faf3d3d56402907a4659","target":"graph","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"advancements in GUI grounding directly correlate with enhanced performance in downstream GUI agent tasks"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the automatically curated GUI grounding data is sufficiently high-quality and representative to enable effective transfer to real agent tasks across environments."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"SeeClick improves visual GUI agents via GUI grounding pre-training on automatically curated data and introduces the ScreenSpot benchmark, with results indicating that stronger grounding boosts downstream task performance."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Advancements in GUI grounding directly improve the performance of visual agents that automate tasks from screenshots alone."}],"snapshot_sha256":"31450ec01983bb27c2184222e9efbc8aa01f89caefedb5a5a3348063ccf3ae81"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7bb9bea7937b18735f61bdd93645b1bbe4fb4fbd19c2ddf38f9ed35cbe88971d"},"paper":{"abstract_excerpt":"Graphical User Interface (GUI) agents are designed to automate complex tasks on digital devices, such as smartphones and desktops. Most existing GUI agents interact with the environment through extracted structured data, which can be notably lengthy (e.g., HTML) and occasionally inaccessible (e.g., on desktops). To alleviate this issue, we propose a novel visual GUI agent -- SeeClick, which only relies on screenshots for task automation. In our preliminary study, we have discovered a key challenge in developing visual GUI agents: GUI grounding -- the capacity to accurately locate screen elemen","authors_text":"Fangzhi Xu, Jianbing Zhang, Kanzhi Cheng, Qiushi Sun, Yantao Li, Yougang Chu, Zhiyong Wu","cross_cats":["cs.AI"],"headline":"Advancements in GUI grounding directly improve the performance of visual agents that automate tasks from screenshots alone.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2024-01-17T08:10:35Z","title":"SeeClick: Harnessing GUI Grounding for Advanced Visual GUI Agents"},"references":{"count":81,"internal_anchors":24,"resolved_work":81,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Aho and Jeffrey D","work_id":"b1f5cb43-a3c7-4ea0-85e7-9ccc9dfe1588","year":1972},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Publications Manual , year = \"1983\", publisher =","work_id":"aca2b566-99e0-4ebb-9c7a-a81219531259","year":1983},{"cited_arxiv_id":"","doi":"10.1145/322234.322243","is_internal_anchor":false,"ref_index":3,"title":"Chandra and Dexter C","work_id":"c3270592-bd69-4213-95e1-4aaf8312be9b","year":1981},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Scalable training of","work_id":"aef70eae-f816-4598-84ec-429a2c09f5fc","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Dan Gusfield , title =. 1997","work_id":"852d89f5-1e7b-4296-b4f2-71e578b5e9f6","year":1997}],"snapshot_sha256":"47ca75fa2081800e3c2abcf76335eada545a4a8ec4fd2b8a871ee32595ca72c7"},"source":{"id":"2401.10935","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T10:04:24.837735Z","id":"a13589ef-02ee-4f0f-b4b4-ed487bf180e8","model_set":{"reader":"grok-4.3"},"one_line_summary":"SeeClick improves visual GUI agents via GUI grounding pre-training on automatically curated data and introduces the ScreenSpot benchmark, with results indicating that stronger grounding boosts downstream task performance.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Advancements in GUI grounding directly improve the performance of visual agents that automate tasks from screenshots alone.","strongest_claim":"advancements in GUI grounding directly correlate with enhanced performance in downstream GUI agent tasks","weakest_assumption":"That the automatically curated GUI grounding data is sufficiently high-quality and representative to enable effective transfer to real agent tasks across environments."}},"verdict_id":"a13589ef-02ee-4f0f-b4b4-ed487bf180e8"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:331bd79e811fa20efc95591402df0a0fcc37f040dc00b358389634f2988d5f39","target":"record","created_at":"2026-05-17T23:38:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c715e54ca2fc5df30d79d57a56510b383f51991a97577d6e6757f967c307f952","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2024-01-17T08:10:35Z","title_canon_sha256":"9c130e118c2a1c05f74f4892c3b481834ad6af8d966941736349a41f6527fb8a"},"schema_version":"1.0","source":{"id":"2401.10935","kind":"arxiv","version":2}},"canonical_sha256":"156b0ed3d903f3071064c1ff05c9de7c107098706c7beb3b249c632e0ef6faf4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"156b0ed3d903f3071064c1ff05c9de7c107098706c7beb3b249c632e0ef6faf4","first_computed_at":"2026-05-17T23:38:14.418669Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:14.418669Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VXDlMSGk/xs/Y01obyKhmgS3c6nzgZYZVhU0eyq7bv0PYAyr/lbLbpVwL2y99E6E/7H5WWRpqlUaAKNSeTVEAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:14.419307Z","signed_message":"canonical_sha256_bytes"},"source_id":"2401.10935","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:331bd79e811fa20efc95591402df0a0fcc37f040dc00b358389634f2988d5f39","sha256:a5c3350ab4b1072e4c3b7d5f33fa930a253ff8c109e4faf3d3d56402907a4659"],"state_sha256":"3fc1cd828d09db0768c7c7adc4bfccf067b714eb338bb47e72eaad740b754bbb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZrWKZtSEQx/jqhSnSTHlVzT90a8qYyc5kMgC+tYiWbAru/sKYECR1ZDiaI816tzIc5bm0p5TaZDb3fVEjLHvCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-19T04:22:45.084463Z","bundle_sha256":"79e9b76ab23a4623e021556081dda562a0c7017fb9da0586ce38526268d978d0"}}