{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:N6RK7NHVJQFUPPBD5UH4JPUXGC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5904112b23a1ed31ff4e1f0f4507073fe2e9deb1ee5fcc0a57c19e2a0700a210","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","title_canon_sha256":"69979ec0b0c135e5ff539259a0e0b7fbb2d4a0ba23db71a388518571b4b455c7"},"schema_version":"1.0","source":{"id":"2501.06659","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2501.06659","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"arxiv_version","alias_value":"2501.06659v2","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.06659","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_12","alias_value":"N6RK7NHVJQFU","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_16","alias_value":"N6RK7NHVJQFUPPBD","created_at":"2026-06-09T02:07:01Z"},{"alias_kind":"pith_short_8","alias_value":"N6RK7NHV","created_at":"2026-06-09T02:07:01Z"}],"graph_snapshots":[{"event_id":"sha256:de5ca273271d5817db35f0d973c42bfa79f19ab88ba6fb49b00dc34e428d49fe","target":"graph","created_at":"2026-06-09T02:07:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2501.06659/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Many templatized documents are programmatically generated from structured data following a visual template. Such documents include invoices, tax documents, financial reports, and purchase orders. Effective data extraction from these documents is crucial to support downstream analytical tasks. Current data extraction tools often struggle with complex document layouts, incur high latency and/or cost on large datasets, and require significant human effort. The key insight of our tool, TWIX, is to infer the underlying template used to create such documents, and then extract the data, rather than e","authors_text":"Aditya G. Parameswaran, Alvin Cheung, Mawil Hasan, Rohan Kosalge, Yiming Lin","cross_cats":["cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","title":"Visual Template Inference for Data Extraction from Documents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.06659","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6ae4bec224282751b59f716e4984d3d02c07ce8ab78096530a7f5a5a8ab6ab15","target":"record","created_at":"2026-06-09T02:07:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5904112b23a1ed31ff4e1f0f4507073fe2e9deb1ee5fcc0a57c19e2a0700a210","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.DB","submitted_at":"2025-01-11T23:07:04Z","title_canon_sha256":"69979ec0b0c135e5ff539259a0e0b7fbb2d4a0ba23db71a388518571b4b455c7"},"schema_version":"1.0","source":{"id":"2501.06659","kind":"arxiv","version":2}},"canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6fa2afb4f54c0b47bc23ed0fc4be9730809cf2a434a5e49fae721a5a926d01eb","first_computed_at":"2026-06-09T02:07:01.709457Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:01.709457Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VRKo6tmzzsl8ZdRstU8ucu5n1wEdNSTq6Y9L/iJMFTQMgx7yU574CfY0owM1SAWY2TANUINXnoqJi8agCacYCQ==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:01.710502Z","signed_message":"canonical_sha256_bytes"},"source_id":"2501.06659","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6ae4bec224282751b59f716e4984d3d02c07ce8ab78096530a7f5a5a8ab6ab15","sha256:de5ca273271d5817db35f0d973c42bfa79f19ab88ba6fb49b00dc34e428d49fe"],"state_sha256":"a831cf16922df115ac1ec289e24c1326784ee12c2f335426d8aedafdd7d19ffd"}