{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:B3OJKST6RA2YRFBH67F33JDQGY","short_pith_number":"pith:B3OJKST6","canonical_record":{"source":{"id":"1805.09687","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2018-05-15T07:05:52Z","cross_cats_sorted":["cs.CL","cs.CV","cs.DC","cs.IR"],"title_canon_sha256":"483fb02ba2b7ab5d98240eacb09b61781e47f907dbcff1a1f095d33be4f46cd9","abstract_canon_sha256":"9faec86f6a5539f628166c421fbd441d3e3b0450af14ef1f778b05b10f68f5e5"},"schema_version":"1.0"},"canonical_sha256":"0edc954a7e8835889427f7cbbda4703605ada3289653528a3702e4563a8c6dec","source":{"kind":"arxiv","id":"1805.09687","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.09687","created_at":"2026-05-18T00:15:03Z"},{"alias_kind":"arxiv_version","alias_value":"1805.09687v1","created_at":"2026-05-18T00:15:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.09687","created_at":"2026-05-18T00:15:03Z"},{"alias_kind":"pith_short_12","alias_value":"B3OJKST6RA2Y","created_at":"2026-05-18T12:32:13Z"},{"alias_kind":"pith_short_16","alias_value":"B3OJKST6RA2YRFBH","created_at":"2026-05-18T12:32:13Z"},{"alias_kind":"pith_short_8","alias_value":"B3OJKST6","created_at":"2026-05-18T12:32:13Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:B3OJKST6RA2YRFBH67F33JDQGY","target":"record","payload":{"canonical_record":{"source":{"id":"1805.09687","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2018-05-15T07:05:52Z","cross_cats_sorted":["cs.CL","cs.CV","cs.DC","cs.IR"],"title_canon_sha256":"483fb02ba2b7ab5d98240eacb09b61781e47f907dbcff1a1f095d33be4f46cd9","abstract_canon_sha256":"9faec86f6a5539f628166c421fbd441d3e3b0450af14ef1f778b05b10f68f5e5"},"schema_version":"1.0"},"canonical_sha256":"0edc954a7e8835889427f7cbbda4703605ada3289653528a3702e4563a8c6dec","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:15:03.136183Z","signature_b64":"vgq3kbVouVLtCPQJrxnnhnZkhvy79rwCRmwdXyLubxj2TeCNsDGIm4EcLA79xHcCRBmtmBM+QzwHqTp2Y+LKDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0edc954a7e8835889427f7cbbda4703605ada3289653528a3702e4563a8c6dec","last_reissued_at":"2026-05-18T00:15:03.135610Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:15:03.135610Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.09687","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:15:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mvEbOKxCjd/WjoDis9ukH6rgWC/XliHtz0vnXyokp8hWLHHzrHT7PXc5ZybHNSUh2KSOQUfqACvB/pfEONNnDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:26:23.330116Z"},"content_sha256":"cc3125988e208b233940a7033bdba82e65b7b4f237cfd828d6d6f3901dd6232c","schema_version":"1.0","event_id":"sha256:cc3125988e208b233940a7033bdba82e65b7b4f237cfd828d6d6f3901dd6232c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:B3OJKST6RA2YRFBH67F33JDQGY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Corpus Conversion Service: A machine learning platform to ingest documents at scale [Poster abstract]","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.CV","cs.DC","cs.IR"],"primary_cat":"cs.DL","authors_text":"Christoph Auer, Costas Bekas, Michele Dolfi, Peter W J Staar","submitted_at":"2018-05-15T07:05:52Z","abstract_excerpt":"Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make their content discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. We present a platform to ingest documents at scale which is powered by Machine Learning techniques and allows the user to "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.09687","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:15:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VekrfVlz72zYSR8U3ygQ6PON6sOIa98dIPIsqJCMYUbVzqGzzXKXT5Loqh3OTLlFmdo0RyTezS0ZQePOd6SgBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T17:26:23.330791Z"},"content_sha256":"491fb197990c603cd2b280f302a8a2931e663b11326f7357de253c4e1a868a74","schema_version":"1.0","event_id":"sha256:491fb197990c603cd2b280f302a8a2931e663b11326f7357de253c4e1a868a74"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/B3OJKST6RA2YRFBH67F33JDQGY/bundle.json","state_url":"https://pith.science/pith/B3OJKST6RA2YRFBH67F33JDQGY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/B3OJKST6RA2YRFBH67F33JDQGY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T17:26:23Z","links":{"resolver":"https://pith.science/pith/B3OJKST6RA2YRFBH67F33JDQGY","bundle":"https://pith.science/pith/B3OJKST6RA2YRFBH67F33JDQGY/bundle.json","state":"https://pith.science/pith/B3OJKST6RA2YRFBH67F33JDQGY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/B3OJKST6RA2YRFBH67F33JDQGY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:B3OJKST6RA2YRFBH67F33JDQGY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9faec86f6a5539f628166c421fbd441d3e3b0450af14ef1f778b05b10f68f5e5","cross_cats_sorted":["cs.CL","cs.CV","cs.DC","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2018-05-15T07:05:52Z","title_canon_sha256":"483fb02ba2b7ab5d98240eacb09b61781e47f907dbcff1a1f095d33be4f46cd9"},"schema_version":"1.0","source":{"id":"1805.09687","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.09687","created_at":"2026-05-18T00:15:03Z"},{"alias_kind":"arxiv_version","alias_value":"1805.09687v1","created_at":"2026-05-18T00:15:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.09687","created_at":"2026-05-18T00:15:03Z"},{"alias_kind":"pith_short_12","alias_value":"B3OJKST6RA2Y","created_at":"2026-05-18T12:32:13Z"},{"alias_kind":"pith_short_16","alias_value":"B3OJKST6RA2YRFBH","created_at":"2026-05-18T12:32:13Z"},{"alias_kind":"pith_short_8","alias_value":"B3OJKST6","created_at":"2026-05-18T12:32:13Z"}],"graph_snapshots":[{"event_id":"sha256:491fb197990c603cd2b280f302a8a2931e663b11326f7357de253c4e1a868a74","target":"graph","created_at":"2026-05-18T00:15:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make their content discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. We present a platform to ingest documents at scale which is powered by Machine Learning techniques and allows the user to ","authors_text":"Christoph Auer, Costas Bekas, Michele Dolfi, Peter W J Staar","cross_cats":["cs.CL","cs.CV","cs.DC","cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2018-05-15T07:05:52Z","title":"Corpus Conversion Service: A machine learning platform to ingest documents at scale [Poster abstract]"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.09687","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cc3125988e208b233940a7033bdba82e65b7b4f237cfd828d6d6f3901dd6232c","target":"record","created_at":"2026-05-18T00:15:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9faec86f6a5539f628166c421fbd441d3e3b0450af14ef1f778b05b10f68f5e5","cross_cats_sorted":["cs.CL","cs.CV","cs.DC","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2018-05-15T07:05:52Z","title_canon_sha256":"483fb02ba2b7ab5d98240eacb09b61781e47f907dbcff1a1f095d33be4f46cd9"},"schema_version":"1.0","source":{"id":"1805.09687","kind":"arxiv","version":1}},"canonical_sha256":"0edc954a7e8835889427f7cbbda4703605ada3289653528a3702e4563a8c6dec","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0edc954a7e8835889427f7cbbda4703605ada3289653528a3702e4563a8c6dec","first_computed_at":"2026-05-18T00:15:03.135610Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:15:03.135610Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"vgq3kbVouVLtCPQJrxnnhnZkhvy79rwCRmwdXyLubxj2TeCNsDGIm4EcLA79xHcCRBmtmBM+QzwHqTp2Y+LKDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:15:03.136183Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.09687","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cc3125988e208b233940a7033bdba82e65b7b4f237cfd828d6d6f3901dd6232c","sha256:491fb197990c603cd2b280f302a8a2931e663b11326f7357de253c4e1a868a74"],"state_sha256":"51d57d12d2be674f6053ee6ed34a5a302c8667bdd6817dee0c1e72b51db7ee0a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VI7Div5aX78ScJhf2Fcxk5Ow9iOuaOp7LKQORkQPEQhedITE+G0ePJjG8UbB9s3KnGyz81zR5Vt+1xhcxDGrBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T17:26:23.334037Z","bundle_sha256":"8e5e7b4af35d89017f2ac98e5a8b07093688b7e812daa21690824fa1bfdcabef"}}