{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:EUGZQ2GAP3Y3J6LU75KXY6AZO3","short_pith_number":"pith:EUGZQ2GA","canonical_record":{"source":{"id":"2603.18652","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-03-19T09:17:21Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"97ab1849e0173f8236e0a5182e708e456b4f5250ad019259cd3ba0f13e9e44a3","abstract_canon_sha256":"7d0c4ab16ef3f69a6da3bbc423ef3c781d612231954874f2c9cd8edd72dc49d6"},"schema_version":"1.0"},"canonical_sha256":"250d9868c07ef1b4f974ff557c781976f4fe8ea0885753bc05928fe048affd3f","source":{"kind":"arxiv","id":"2603.18652","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.18652","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2603.18652v2","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.18652","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"EUGZQ2GAP3Y3","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"EUGZQ2GAP3Y3J6LU","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"EUGZQ2GA","created_at":"2026-06-02T03:04:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:EUGZQ2GAP3Y3J6LU75KXY6AZO3","target":"record","payload":{"canonical_record":{"source":{"id":"2603.18652","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-03-19T09:17:21Z","cross_cats_sorted":["cs.AI","cs.IR"],"title_canon_sha256":"97ab1849e0173f8236e0a5182e708e456b4f5250ad019259cd3ba0f13e9e44a3","abstract_canon_sha256":"7d0c4ab16ef3f69a6da3bbc423ef3c781d612231954874f2c9cd8edd72dc49d6"},"schema_version":"1.0"},"canonical_sha256":"250d9868c07ef1b4f974ff557c781976f4fe8ea0885753bc05928fe048affd3f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T03:04:40.496391Z","signature_b64":"0UszHPtUhS1BMgoRrNvOTztdrre3cvHcgvXD+nu0oMS9hyE6sK/D97lW6Jwi7ZrxJR18obCqXJhgA8KGqC5nCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"250d9868c07ef1b4f974ff557c781976f4fe8ea0885753bc05928fe048affd3f","last_reissued_at":"2026-06-02T03:04:40.495944Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T03:04:40.495944Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.18652","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T03:04:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0Sly3KP3ojWSyJfPauRpYlhwMT9bptqCFQIYOCjYjKa4hPaTy/Xzv7AbyIbMa1oG3JY+/6ADf+SUZL57K/u9Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T08:21:48.325362Z"},"content_sha256":"a626be670911986579a5f5b6e3bd76c9fab7a9a6a7189c5bbea0288bfe2961c3","schema_version":"1.0","event_id":"sha256:a626be670911986579a5f5b6e3bd76c9fab7a9a6a7189c5bbea0288bfe2961c3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:EUGZQ2GAP3Y3J6LU75KXY6AZO3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond String Matching: Semantic Evaluation of PDF Table Extraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.IR"],"primary_cat":"cs.CV","authors_text":"Janis Keuper, Pius Horn","submitted_at":"2026-03-19T09:17:21Z","abstract_excerpt":"Reliably extracting tables from PDFs is essential for large-scale scientific data mining and knowledge base construction, yet existing evaluation approaches rely on rule-based metrics that fail to capture semantic equivalence of table content. We present a benchmarking framework based on synthetically generated PDFs with precise LaTeX ground truth, using tables sourced from arXiv to ensure realistic complexity and diversity. As our central methodological contribution, we apply LLM-as-a-judge for semantic table evaluation, integrated into a matching pipeline that accommodates inconsistencies in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.18652","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.18652/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T03:04:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cc/kVhSCJ494+WPZfxqrTQyiRJVE3b88gEyI34so6mUt1wmgsVDVfD29JHeooGCrrnZVppYGwA60LRL4nvXcCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T08:21:48.326061Z"},"content_sha256":"733f4306b1930c46fa7645c78ca4a8e887afe40b1861dd26d44dcfe836397921","schema_version":"1.0","event_id":"sha256:733f4306b1930c46fa7645c78ca4a8e887afe40b1861dd26d44dcfe836397921"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3/bundle.json","state_url":"https://pith.science/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T08:21:48Z","links":{"resolver":"https://pith.science/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3","bundle":"https://pith.science/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3/bundle.json","state":"https://pith.science/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EUGZQ2GAP3Y3J6LU75KXY6AZO3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EUGZQ2GAP3Y3J6LU75KXY6AZO3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7d0c4ab16ef3f69a6da3bbc423ef3c781d612231954874f2c9cd8edd72dc49d6","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-03-19T09:17:21Z","title_canon_sha256":"97ab1849e0173f8236e0a5182e708e456b4f5250ad019259cd3ba0f13e9e44a3"},"schema_version":"1.0","source":{"id":"2603.18652","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.18652","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2603.18652v2","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.18652","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"EUGZQ2GAP3Y3","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"EUGZQ2GAP3Y3J6LU","created_at":"2026-06-02T03:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"EUGZQ2GA","created_at":"2026-06-02T03:04:40Z"}],"graph_snapshots":[{"event_id":"sha256:733f4306b1930c46fa7645c78ca4a8e887afe40b1861dd26d44dcfe836397921","target":"graph","created_at":"2026-06-02T03:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.18652/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reliably extracting tables from PDFs is essential for large-scale scientific data mining and knowledge base construction, yet existing evaluation approaches rely on rule-based metrics that fail to capture semantic equivalence of table content. We present a benchmarking framework based on synthetically generated PDFs with precise LaTeX ground truth, using tables sourced from arXiv to ensure realistic complexity and diversity. As our central methodological contribution, we apply LLM-as-a-judge for semantic table evaluation, integrated into a matching pipeline that accommodates inconsistencies in","authors_text":"Janis Keuper, Pius Horn","cross_cats":["cs.AI","cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-03-19T09:17:21Z","title":"Beyond String Matching: Semantic Evaluation of PDF Table Extraction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.18652","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a626be670911986579a5f5b6e3bd76c9fab7a9a6a7189c5bbea0288bfe2961c3","target":"record","created_at":"2026-06-02T03:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7d0c4ab16ef3f69a6da3bbc423ef3c781d612231954874f2c9cd8edd72dc49d6","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-03-19T09:17:21Z","title_canon_sha256":"97ab1849e0173f8236e0a5182e708e456b4f5250ad019259cd3ba0f13e9e44a3"},"schema_version":"1.0","source":{"id":"2603.18652","kind":"arxiv","version":2}},"canonical_sha256":"250d9868c07ef1b4f974ff557c781976f4fe8ea0885753bc05928fe048affd3f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"250d9868c07ef1b4f974ff557c781976f4fe8ea0885753bc05928fe048affd3f","first_computed_at":"2026-06-02T03:04:40.495944Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T03:04:40.495944Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0UszHPtUhS1BMgoRrNvOTztdrre3cvHcgvXD+nu0oMS9hyE6sK/D97lW6Jwi7ZrxJR18obCqXJhgA8KGqC5nCQ==","signature_status":"signed_v1","signed_at":"2026-06-02T03:04:40.496391Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.18652","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a626be670911986579a5f5b6e3bd76c9fab7a9a6a7189c5bbea0288bfe2961c3","sha256:733f4306b1930c46fa7645c78ca4a8e887afe40b1861dd26d44dcfe836397921"],"state_sha256":"f7ac29d95e5979a6becd3f2e9ab937d431844c6c913b4e81bccab9d39a3dc19d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JKEn1zZ/67CcM6qaGyJmFjUz3G/x4T5UiYAhuTI4THBUAp098JWvj5IrUbbz2HSDKQAYzAvY2MtJEz1TJKkmBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T08:21:48.330681Z","bundle_sha256":"cd5b32a75249e32183180607695daf71610040bf3355e04fb608f84d5595c4db"}}