{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:SGK536E6KCACMPJDU3T2K5CQFI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7908b9fe0649fa224dd11dbd3eb447458eb791f01da9b82e375c15fe7965b83e","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-05-24T08:53:05Z","title_canon_sha256":"d303844761d84f8ce23cc7700c82cae9cc4afbe878d366435a9cb9089745d474"},"schema_version":"1.0","source":{"id":"2505.18603","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.18603","created_at":"2026-05-27T02:05:03Z"},{"alias_kind":"arxiv_version","alias_value":"2505.18603v2","created_at":"2026-05-27T02:05:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.18603","created_at":"2026-05-27T02:05:03Z"},{"alias_kind":"pith_short_12","alias_value":"SGK536E6KCAC","created_at":"2026-05-27T02:05:03Z"},{"alias_kind":"pith_short_16","alias_value":"SGK536E6KCACMPJD","created_at":"2026-05-27T02:05:03Z"},{"alias_kind":"pith_short_8","alias_value":"SGK536E6","created_at":"2026-05-27T02:05:03Z"}],"graph_snapshots":[{"event_id":"sha256:747765f373e890dd027063e4fca30bb0f6f8f9cd7938e4b66ca90806dad33e38","target":"graph","created_at":"2026-05-27T02:05:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2505.18603/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Document understanding aims to perform question answering and information extraction over document images, where the visual content is highly information-dense and most queries rely on only a few relevant layout regions. However, existing methods either adopt a one-pass strategy that implicitly assumes all layouts are equally important, or focus excessively on small regions at the cost of losing critical layout information. To address these limitations, we introduce Doc-CoB (Chain-of-Boxes), a simple-yet-effective framework that integrates coarse-to-fine layout-aware visual reasoning into mult","authors_text":"Bo Zhang, Gang Huang, Hangdi Xing, Huan Zhou, Jiajun Bu, Kai Ye, Kehan Chen, Sheng Zhou, Xianwei Mao, Ye Mo, Zirui Shao, Zixu Yan","cross_cats":["cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-05-24T08:53:05Z","title":"Doc-CoB: Enhancing Document Understanding with Visual Chain-of-Boxes Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.18603","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c95ebbd1540b06962fee23dc3bf33a5527c09c7e757bb996c273e19ed3bb85ae","target":"record","created_at":"2026-05-27T02:05:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7908b9fe0649fa224dd11dbd3eb447458eb791f01da9b82e375c15fe7965b83e","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-05-24T08:53:05Z","title_canon_sha256":"d303844761d84f8ce23cc7700c82cae9cc4afbe878d366435a9cb9089745d474"},"schema_version":"1.0","source":{"id":"2505.18603","kind":"arxiv","version":2}},"canonical_sha256":"9195ddf89e5080263d23a6e7a574502a11b49b5c68f37b915fa4f4368720acc3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9195ddf89e5080263d23a6e7a574502a11b49b5c68f37b915fa4f4368720acc3","first_computed_at":"2026-05-27T02:05:03.386365Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T02:05:03.386365Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KTQIoNyUDq4tO0hzWlr1TsPZ8omj2puPKzf/hzDwDpl5SRljvTbhrtLp0vysoAloNbLyRJrT0UwG0zrYZe7jCA==","signature_status":"signed_v1","signed_at":"2026-05-27T02:05:03.387205Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.18603","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c95ebbd1540b06962fee23dc3bf33a5527c09c7e757bb996c273e19ed3bb85ae","sha256:747765f373e890dd027063e4fca30bb0f6f8f9cd7938e4b66ca90806dad33e38"],"state_sha256":"b47d13cabcc0ee81a69c73eed97b7f2c88941e64244fdea09a54d6652927eded"}