{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LVNLMQBVHIBQ73A66BMU7Y5HS7","short_pith_number":"pith:LVNLMQBV","schema_version":"1.0","canonical_sha256":"5d5ab640353a030fec1ef0594fe3a797fc2196e571d31fad04ac4ae961036a80","source":{"kind":"arxiv","id":"2605.19866","version":1},"attestation_state":"computed","paper":{"title":"Structured Layout Priors for Robust Out-of-Distribution Visual Document Understanding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Ahmed Nassar, A. Said Gurbuz, Christoph Auer, Peter El Hachem, Peter W. J. Staar","submitted_at":"2026-05-19T13:58:24Z","abstract_excerpt":"Vision-Language Models (VLMs) parse documents end-to-end but frequently break down on layouts unlike those seen in training. We attribute this to a two-hop bottleneck: before the decoder can extract content (Hop 2), it must first classify and localize the enclosing layout entity (Hop 1), and when the first hop fails the second collapses into omissions, malformed structure, or autoregressive repetition. We pre-resolve Hop 1 outside the decoder by running a lightweight RT-DETR detector, serializing its outputs in the parser's native DocTags vocabulary, and injecting them into the prompt alongsid"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19866","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-19T13:58:24Z","cross_cats_sorted":[],"title_canon_sha256":"655c38b135c9a31ac62021d062ad3828c6209e008bd8bae654511cd433dd83ba","abstract_canon_sha256":"51b7a004f1fcb5c34733214430f93974e870716bf79eea702e507dcdc4a0d78c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:06:18.252015Z","signature_b64":"b7HfkQpizRSpF+Imr1NyP/4YFz4G0+rx+Zp0+DgvPq1OMH5ZBtOguYTz9xVI/ytyRsC9oJBhScwHM3h9h2BbDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5d5ab640353a030fec1ef0594fe3a797fc2196e571d31fad04ac4ae961036a80","last_reissued_at":"2026-05-20T01:06:18.251428Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:06:18.251428Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Structured Layout Priors for Robust Out-of-Distribution Visual Document Understanding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Ahmed Nassar, A. Said Gurbuz, Christoph Auer, Peter El Hachem, Peter W. J. Staar","submitted_at":"2026-05-19T13:58:24Z","abstract_excerpt":"Vision-Language Models (VLMs) parse documents end-to-end but frequently break down on layouts unlike those seen in training. We attribute this to a two-hop bottleneck: before the decoder can extract content (Hop 2), it must first classify and localize the enclosing layout entity (Hop 1), and when the first hop fails the second collapses into omissions, malformed structure, or autoregressive repetition. We pre-resolve Hop 1 outside the decoder by running a lightweight RT-DETR detector, serializing its outputs in the parser's native DocTags vocabulary, and injecting them into the prompt alongsid"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19866","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19866/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19866","created_at":"2026-05-20T01:06:18.251530+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19866v1","created_at":"2026-05-20T01:06:18.251530+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19866","created_at":"2026-05-20T01:06:18.251530+00:00"},{"alias_kind":"pith_short_12","alias_value":"LVNLMQBVHIBQ","created_at":"2026-05-20T01:06:18.251530+00:00"},{"alias_kind":"pith_short_16","alias_value":"LVNLMQBVHIBQ73A6","created_at":"2026-05-20T01:06:18.251530+00:00"},{"alias_kind":"pith_short_8","alias_value":"LVNLMQBV","created_at":"2026-05-20T01:06:18.251530+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7","json":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7.json","graph_json":"https://pith.science/api/pith-number/LVNLMQBVHIBQ73A66BMU7Y5HS7/graph.json","events_json":"https://pith.science/api/pith-number/LVNLMQBVHIBQ73A66BMU7Y5HS7/events.json","paper":"https://pith.science/paper/LVNLMQBV"},"agent_actions":{"view_html":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7","download_json":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7.json","view_paper":"https://pith.science/paper/LVNLMQBV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19866&json=true","fetch_graph":"https://pith.science/api/pith-number/LVNLMQBVHIBQ73A66BMU7Y5HS7/graph.json","fetch_events":"https://pith.science/api/pith-number/LVNLMQBVHIBQ73A66BMU7Y5HS7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7/action/storage_attestation","attest_author":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7/action/author_attestation","sign_citation":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7/action/citation_signature","submit_replication":"https://pith.science/pith/LVNLMQBVHIBQ73A66BMU7Y5HS7/action/replication_record"}},"created_at":"2026-05-20T01:06:18.251530+00:00","updated_at":"2026-05-20T01:06:18.251530+00:00"}