{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:QMNI4LRTJL6LOOITEAK5OCLW7W","short_pith_number":"pith:QMNI4LRT","canonical_record":{"source":{"id":"2509.21552","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-09-25T20:38:01Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6e7428599cfad04afbd7a1d9e53e83b5b587b5ce7378657efe6a3b7dcab289f1","abstract_canon_sha256":"9b35883c64595c95709f0fbf0a978a390a3049142145fad5b78f8f81c95e03fc"},"schema_version":"1.0"},"canonical_sha256":"831a8e2e334afcb739132015d70976fdadbe4be07b31c337a9fe286d353cebfd","source":{"kind":"arxiv","id":"2509.21552","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.21552","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2509.21552v2","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.21552","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"QMNI4LRTJL6L","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"QMNI4LRTJL6LOOIT","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"QMNI4LRT","created_at":"2026-05-27T01:04:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:QMNI4LRTJL6LOOITEAK5OCLW7W","target":"record","payload":{"canonical_record":{"source":{"id":"2509.21552","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-09-25T20:38:01Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6e7428599cfad04afbd7a1d9e53e83b5b587b5ce7378657efe6a3b7dcab289f1","abstract_canon_sha256":"9b35883c64595c95709f0fbf0a978a390a3049142145fad5b78f8f81c95e03fc"},"schema_version":"1.0"},"canonical_sha256":"831a8e2e334afcb739132015d70976fdadbe4be07b31c337a9fe286d353cebfd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:04:51.056402Z","signature_b64":"j3jnRpcVKhU/dYbD9MBsOaxBohbZROjK1P++Yq0qUj0LMC7hOx4CRgeBBDU2ZJOJ6YVjvLxX5gz+hgddbRe1BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"831a8e2e334afcb739132015d70976fdadbe4be07b31c337a9fe286d353cebfd","last_reissued_at":"2026-05-27T01:04:51.055642Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:04:51.055642Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2509.21552","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:04:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gGkz892zOJl8cq0iTIo8N3lmReT1QIE0vuFhuJSSMbrPbcG7PL2c7FCPd7mRS/M8SxnmuvuEP6XViZhAuAouAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:49:16.325022Z"},"content_sha256":"04fd11e09d3ef0492d8749097bfb2c940a58a9d866f6ac649f0a19de0459197f","schema_version":"1.0","event_id":"sha256:04fd11e09d3ef0492d8749097bfb2c940a58a9d866f6ac649f0a19de0459197f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:QMNI4LRTJL6LOOITEAK5OCLW7W","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning GUI Grounding with Spatial Reasoning from Visual Feedback","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Chaoyun Zhang, Fangkai Yang, Huseyin Atahan Inan, Lukas Wutschitz, Lu Wang, Pasquale Minervini, Robert Sim, Samuel Kessler, Saravan Rajmohan, Wei-Ning Chen, Yu Zhao","submitted_at":"2025-09-25T20:38:01Z","abstract_excerpt":"Graphical User Interface (GUI) grounding is commonly framed as a coordinate prediction task -- given a natural language instruction, generate on-screen coordinates for actions such as clicks and keystrokes. However, recent Vision Language Models (VLMs) often fail to predict accurate numeric coordinates when processing GUI images with high resolutions and complex layouts. To address this issue, we reframe GUI grounding as an interactive search task, where the VLM generates actions to move a cursor in the GUI to locate UI elements. At each step, the model determines the target object, evaluates "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.21552","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.21552/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:04:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SwTlbylgcWA8K1h7ki8liMaskBIKIC0Aqx7tNiKvI6j5pdRHD6dXZXpoUUEaotu+ay0YiL2ba8a1iYsxQ7hYBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:49:16.325453Z"},"content_sha256":"173b8d0a5a46206b35e7d82215abf720ce7260c5115d6f4a9145d87296240a43","schema_version":"1.0","event_id":"sha256:173b8d0a5a46206b35e7d82215abf720ce7260c5115d6f4a9145d87296240a43"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QMNI4LRTJL6LOOITEAK5OCLW7W/bundle.json","state_url":"https://pith.science/pith/QMNI4LRTJL6LOOITEAK5OCLW7W/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QMNI4LRTJL6LOOITEAK5OCLW7W/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T11:49:16Z","links":{"resolver":"https://pith.science/pith/QMNI4LRTJL6LOOITEAK5OCLW7W","bundle":"https://pith.science/pith/QMNI4LRTJL6LOOITEAK5OCLW7W/bundle.json","state":"https://pith.science/pith/QMNI4LRTJL6LOOITEAK5OCLW7W/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QMNI4LRTJL6LOOITEAK5OCLW7W/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:QMNI4LRTJL6LOOITEAK5OCLW7W","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9b35883c64595c95709f0fbf0a978a390a3049142145fad5b78f8f81c95e03fc","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-09-25T20:38:01Z","title_canon_sha256":"6e7428599cfad04afbd7a1d9e53e83b5b587b5ce7378657efe6a3b7dcab289f1"},"schema_version":"1.0","source":{"id":"2509.21552","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.21552","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2509.21552v2","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.21552","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"QMNI4LRTJL6L","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"QMNI4LRTJL6LOOIT","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"QMNI4LRT","created_at":"2026-05-27T01:04:51Z"}],"graph_snapshots":[{"event_id":"sha256:173b8d0a5a46206b35e7d82215abf720ce7260c5115d6f4a9145d87296240a43","target":"graph","created_at":"2026-05-27T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2509.21552/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Graphical User Interface (GUI) grounding is commonly framed as a coordinate prediction task -- given a natural language instruction, generate on-screen coordinates for actions such as clicks and keystrokes. However, recent Vision Language Models (VLMs) often fail to predict accurate numeric coordinates when processing GUI images with high resolutions and complex layouts. To address this issue, we reframe GUI grounding as an interactive search task, where the VLM generates actions to move a cursor in the GUI to locate UI elements. At each step, the model determines the target object, evaluates ","authors_text":"Chaoyun Zhang, Fangkai Yang, Huseyin Atahan Inan, Lukas Wutschitz, Lu Wang, Pasquale Minervini, Robert Sim, Samuel Kessler, Saravan Rajmohan, Wei-Ning Chen, Yu Zhao","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-09-25T20:38:01Z","title":"Learning GUI Grounding with Spatial Reasoning from Visual Feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.21552","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:04fd11e09d3ef0492d8749097bfb2c940a58a9d866f6ac649f0a19de0459197f","target":"record","created_at":"2026-05-27T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9b35883c64595c95709f0fbf0a978a390a3049142145fad5b78f8f81c95e03fc","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-09-25T20:38:01Z","title_canon_sha256":"6e7428599cfad04afbd7a1d9e53e83b5b587b5ce7378657efe6a3b7dcab289f1"},"schema_version":"1.0","source":{"id":"2509.21552","kind":"arxiv","version":2}},"canonical_sha256":"831a8e2e334afcb739132015d70976fdadbe4be07b31c337a9fe286d353cebfd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"831a8e2e334afcb739132015d70976fdadbe4be07b31c337a9fe286d353cebfd","first_computed_at":"2026-05-27T01:04:51.055642Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:04:51.055642Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"j3jnRpcVKhU/dYbD9MBsOaxBohbZROjK1P++Yq0qUj0LMC7hOx4CRgeBBDU2ZJOJ6YVjvLxX5gz+hgddbRe1BQ==","signature_status":"signed_v1","signed_at":"2026-05-27T01:04:51.056402Z","signed_message":"canonical_sha256_bytes"},"source_id":"2509.21552","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:04fd11e09d3ef0492d8749097bfb2c940a58a9d866f6ac649f0a19de0459197f","sha256:173b8d0a5a46206b35e7d82215abf720ce7260c5115d6f4a9145d87296240a43"],"state_sha256":"039609b899c0f11241a07245786b638de5d4b8912b9726bef597c0ae660b7daa"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Dx0JQk53zY6C8L3wH+t0bzbdxHQOdYFHh9a1Lw51EfqHOEh02XyNaR+/Ia3LI9oMp5gCByvwPQl5iY5NGD0GAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T11:49:16.327639Z","bundle_sha256":"d604a7acced8cb9261c67e1e01723d21cf05bced6aaf329095c5c3164b43a073"}}