{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:7NH5IDEJSRISLUBOQRDXR676RM","short_pith_number":"pith:7NH5IDEJ","schema_version":"1.0","canonical_sha256":"fb4fd40c89945125d02e844778fbfe8b35c5022230b0a2bdb308c1d5ec4c2eae","source":{"kind":"arxiv","id":"2606.17355","version":1},"attestation_state":"computed","paper":{"title":"Complex Layout Classification in the Wild: A Low-Resource Approach with Layout-Preserving Augmentations","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Berat Kurar-Barakat, Daria Vasyutinsky-Shapira, Gal Grudka, Iddo Hakim, Mohammad Suliman, Nachum Dershowitz, Omer Ventura, Sharva Gogawale","submitted_at":"2026-06-15T23:06:09Z","abstract_excerpt":"Many digitized corpora suffer from low resources because annotations may be scarce, page scans are noisy and of poor resolution, or layouts are structurally complex in ways that negatively affect the quality of automatic transcription. Developing robust classification models for low-resource languages is inhibited by the lack of large-scale annotated data and by the frequent semantic complexity of page layouts. To this end, we have curated a complex-layout dataset, manually classified into eight distinct layout types based on their separator regions. To overcome data scarcity, we propose a nov"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.17355","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-15T23:06:09Z","cross_cats_sorted":[],"title_canon_sha256":"eaac407eb353c83f96522443d62b38f24d211f8ffeddc9e5df833f400562746f","abstract_canon_sha256":"8a8382e2312d60ee7e76e51a503ee5438b40431d2a4686700f6dfa451fe7bd61"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:10.001897Z","signature_b64":"PdqGl5FboQ3//kmwnkfcheRjl3RxdNPLiB4x+qBnDIOIcqZG/fsrMckvP5O8Ex5mT5mQsdNPVdTbv16CkHrVAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fb4fd40c89945125d02e844778fbfe8b35c5022230b0a2bdb308c1d5ec4c2eae","last_reissued_at":"2026-06-19T16:10:10.001516Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:10.001516Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Complex Layout Classification in the Wild: A Low-Resource Approach with Layout-Preserving Augmentations","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Berat Kurar-Barakat, Daria Vasyutinsky-Shapira, Gal Grudka, Iddo Hakim, Mohammad Suliman, Nachum Dershowitz, Omer Ventura, Sharva Gogawale","submitted_at":"2026-06-15T23:06:09Z","abstract_excerpt":"Many digitized corpora suffer from low resources because annotations may be scarce, page scans are noisy and of poor resolution, or layouts are structurally complex in ways that negatively affect the quality of automatic transcription. Developing robust classification models for low-resource languages is inhibited by the lack of large-scale annotated data and by the frequent semantic complexity of page layouts. To this end, we have curated a complex-layout dataset, manually classified into eight distinct layout types based on their separator regions. To overcome data scarcity, we propose a nov"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17355","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.17355/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.17355","created_at":"2026-06-19T16:10:10.001571+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.17355v1","created_at":"2026-06-19T16:10:10.001571+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17355","created_at":"2026-06-19T16:10:10.001571+00:00"},{"alias_kind":"pith_short_12","alias_value":"7NH5IDEJSRIS","created_at":"2026-06-19T16:10:10.001571+00:00"},{"alias_kind":"pith_short_16","alias_value":"7NH5IDEJSRISLUBO","created_at":"2026-06-19T16:10:10.001571+00:00"},{"alias_kind":"pith_short_8","alias_value":"7NH5IDEJ","created_at":"2026-06-19T16:10:10.001571+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM","json":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM.json","graph_json":"https://pith.science/api/pith-number/7NH5IDEJSRISLUBOQRDXR676RM/graph.json","events_json":"https://pith.science/api/pith-number/7NH5IDEJSRISLUBOQRDXR676RM/events.json","paper":"https://pith.science/paper/7NH5IDEJ"},"agent_actions":{"view_html":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM","download_json":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM.json","view_paper":"https://pith.science/paper/7NH5IDEJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.17355&json=true","fetch_graph":"https://pith.science/api/pith-number/7NH5IDEJSRISLUBOQRDXR676RM/graph.json","fetch_events":"https://pith.science/api/pith-number/7NH5IDEJSRISLUBOQRDXR676RM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM/action/storage_attestation","attest_author":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM/action/author_attestation","sign_citation":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM/action/citation_signature","submit_replication":"https://pith.science/pith/7NH5IDEJSRISLUBOQRDXR676RM/action/replication_record"}},"created_at":"2026-06-19T16:10:10.001571+00:00","updated_at":"2026-06-19T16:10:10.001571+00:00"}