{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DIXOGX6KGSGTFLGOCG7URS2R66","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b37a05dcbdfddcf462018fb0f89bbe8c07afc6882758f842f9bd5227efed0b80","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-03-23T09:38:15Z","title_canon_sha256":"6b38a8544abc93e578e56d5bc68433ea4d6ec6aa6f93547c7ff434ff0b352d6d"},"schema_version":"1.0","source":{"id":"2603.21746","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.21746","created_at":"2026-05-29T02:05:43Z"},{"alias_kind":"arxiv_version","alias_value":"2603.21746v2","created_at":"2026-05-29T02:05:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.21746","created_at":"2026-05-29T02:05:43Z"},{"alias_kind":"pith_short_12","alias_value":"DIXOGX6KGSGT","created_at":"2026-05-29T02:05:43Z"},{"alias_kind":"pith_short_16","alias_value":"DIXOGX6KGSGTFLGO","created_at":"2026-05-29T02:05:43Z"},{"alias_kind":"pith_short_8","alias_value":"DIXOGX6K","created_at":"2026-05-29T02:05:43Z"}],"graph_snapshots":[{"event_id":"sha256:94c013ef84e46f3c180a85dc59b95beb30458c8398db34a5d426c72effa4b147","target":"graph","created_at":"2026-05-29T02:05:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.21746/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pointing-based methods decompose complex tasks as sequential grounding and reasoning steps. Given a query, the model first grounds the relevant objects by generating their coordinates, and then predicts an answer conditioned on these points. While this approach has been shown to increase the performance of Large Vision-Language Models (LVLMs), it remains unclear why and how it improves the models' visual reasoning. In this work, we evaluate pointing-based methods in the task of zero-shot counting in visual scenes. We experiment with multiple fine-tuning and training-free approaches on state-of","authors_text":"Giuseppe Riccardi, Massimo Rizzoli, Seyed Mahed Mousavi, Simone Alghisi","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-03-23T09:38:15Z","title":"Getting to the Point: Pointing Improves LVLMs at Counting"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.21746","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ed4c02c3c5212684acdcffb457bee23647f8f282c68beb8f6a8c85b276b81928","target":"record","created_at":"2026-05-29T02:05:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b37a05dcbdfddcf462018fb0f89bbe8c07afc6882758f842f9bd5227efed0b80","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-03-23T09:38:15Z","title_canon_sha256":"6b38a8544abc93e578e56d5bc68433ea4d6ec6aa6f93547c7ff434ff0b352d6d"},"schema_version":"1.0","source":{"id":"2603.21746","kind":"arxiv","version":2}},"canonical_sha256":"1a2ee35fca348d32acce11bf48cb51f79bfecd9fb44af4628438615083baa971","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1a2ee35fca348d32acce11bf48cb51f79bfecd9fb44af4628438615083baa971","first_computed_at":"2026-05-29T02:05:43.367434Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T02:05:43.367434Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"v6Wdb/9dCbE6NPcxmN82TGaJyePdRNY7qc5IpLJRJeqDnu7Pw5BNmBi1wNEYqTSbAi5FA4PVA5aBRRCQUmBGCA==","signature_status":"signed_v1","signed_at":"2026-05-29T02:05:43.368476Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.21746","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ed4c02c3c5212684acdcffb457bee23647f8f282c68beb8f6a8c85b276b81928","sha256:94c013ef84e46f3c180a85dc59b95beb30458c8398db34a5d426c72effa4b147"],"state_sha256":"ad2d6b7127c8c4a87a8bb2c1d85f06051e206c2c04bfda33a049d1cf08150438"}