{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:5A3FYMUHDWAGOBJHP3QJFWQXW5","short_pith_number":"pith:5A3FYMUH","canonical_record":{"source":{"id":"1612.06195","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-12-19T14:24:23Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"fc95eb9ffc2e79da746d95c34a3549515fd59aeddfb091132c05e5b302264893","abstract_canon_sha256":"31b975303674bd315d91c36c70c618fdae5bd76df92e536aea5bd658d1ae3365"},"schema_version":"1.0"},"canonical_sha256":"e8365c32871d806705277ee092da17b764af243662d8036bc2cdb90d943ed13a","source":{"kind":"arxiv","id":"1612.06195","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1612.06195","created_at":"2026-05-18T00:54:42Z"},{"alias_kind":"arxiv_version","alias_value":"1612.06195v1","created_at":"2026-05-18T00:54:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.06195","created_at":"2026-05-18T00:54:42Z"},{"alias_kind":"pith_short_12","alias_value":"5A3FYMUHDWAG","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_16","alias_value":"5A3FYMUHDWAGOBJH","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_8","alias_value":"5A3FYMUH","created_at":"2026-05-18T12:29:58Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:5A3FYMUHDWAGOBJHP3QJFWQXW5","target":"record","payload":{"canonical_record":{"source":{"id":"1612.06195","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-12-19T14:24:23Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"fc95eb9ffc2e79da746d95c34a3549515fd59aeddfb091132c05e5b302264893","abstract_canon_sha256":"31b975303674bd315d91c36c70c618fdae5bd76df92e536aea5bd658d1ae3365"},"schema_version":"1.0"},"canonical_sha256":"e8365c32871d806705277ee092da17b764af243662d8036bc2cdb90d943ed13a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:54:42.915285Z","signature_b64":"nR2IMNsDCuBwS62g6IU5oyNARZm9Qvskr4jjWjwzNeK8/5wTk8mObjVn50Q+gLHg6IyqgMQJvRGf8PaABaeYAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e8365c32871d806705277ee092da17b764af243662d8036bc2cdb90d943ed13a","last_reissued_at":"2026-05-18T00:54:42.914941Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:54:42.914941Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1612.06195","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:54:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3PBszNXRSJA25PGrR8wWDIXzIbRHi9YIEi33WjSXsMZq8hYvtsl5t0ylai2yLfT2Sb3QlmnfjpiBF4rrX+N+DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T08:30:15.907041Z"},"content_sha256":"0e326146910abff5d49e31838018c85806357dd4349348f0669a1767339c907b","schema_version":"1.0","event_id":"sha256:0e326146910abff5d49e31838018c85806357dd4349348f0669a1767339c907b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:5A3FYMUHDWAGOBJHP3QJFWQXW5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Scalable Document-based Architecture for Text Analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.DB","authors_text":"Ciprian-Octavian Truic\\u{a}, J\\'er\\^ome Darmont (ERIC), Julien Velcin (ERIC)","submitted_at":"2016-12-19T14:24:23Z","abstract_excerpt":"Analyzing textual data is a very challenging task because of the huge volume of data generated daily. Fundamental issues in text analysis include the lack of structure in document datasets, the need for various preprocessing steps %(e.g., stem or lemma extraction, part-of-speech tagging, named entities recognition...), and performance and scaling issues. Existing text analysis architectures partly solve these issues, providing restrictive data schemas, addressing only one aspect of text preprocessing and focusing on one single task when dealing with performance optimization. %As a result, no d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.06195","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:54:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"813LS/VPfTOp5yyuRUVTZi3ZOaxZcbkIsrlu6k4Idm4FXOZdNRKxcYDITsdn/r5jdhlf6lMPcNMTPJbT1qdPCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T08:30:15.907398Z"},"content_sha256":"1691dcf3217a99760bf82c89ee9f78072450253a81d570b4b086b1cceb7f89bd","schema_version":"1.0","event_id":"sha256:1691dcf3217a99760bf82c89ee9f78072450253a81d570b4b086b1cceb7f89bd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5/bundle.json","state_url":"https://pith.science/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T08:30:15Z","links":{"resolver":"https://pith.science/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5","bundle":"https://pith.science/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5/bundle.json","state":"https://pith.science/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5A3FYMUHDWAGOBJHP3QJFWQXW5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:5A3FYMUHDWAGOBJHP3QJFWQXW5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"31b975303674bd315d91c36c70c618fdae5bd76df92e536aea5bd658d1ae3365","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-12-19T14:24:23Z","title_canon_sha256":"fc95eb9ffc2e79da746d95c34a3549515fd59aeddfb091132c05e5b302264893"},"schema_version":"1.0","source":{"id":"1612.06195","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1612.06195","created_at":"2026-05-18T00:54:42Z"},{"alias_kind":"arxiv_version","alias_value":"1612.06195v1","created_at":"2026-05-18T00:54:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.06195","created_at":"2026-05-18T00:54:42Z"},{"alias_kind":"pith_short_12","alias_value":"5A3FYMUHDWAG","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_16","alias_value":"5A3FYMUHDWAGOBJH","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_8","alias_value":"5A3FYMUH","created_at":"2026-05-18T12:29:58Z"}],"graph_snapshots":[{"event_id":"sha256:1691dcf3217a99760bf82c89ee9f78072450253a81d570b4b086b1cceb7f89bd","target":"graph","created_at":"2026-05-18T00:54:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Analyzing textual data is a very challenging task because of the huge volume of data generated daily. Fundamental issues in text analysis include the lack of structure in document datasets, the need for various preprocessing steps %(e.g., stem or lemma extraction, part-of-speech tagging, named entities recognition...), and performance and scaling issues. Existing text analysis architectures partly solve these issues, providing restrictive data schemas, addressing only one aspect of text preprocessing and focusing on one single task when dealing with performance optimization. %As a result, no d","authors_text":"Ciprian-Octavian Truic\\u{a}, J\\'er\\^ome Darmont (ERIC), Julien Velcin (ERIC)","cross_cats":["cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-12-19T14:24:23Z","title":"A Scalable Document-based Architecture for Text Analysis"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.06195","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0e326146910abff5d49e31838018c85806357dd4349348f0669a1767339c907b","target":"record","created_at":"2026-05-18T00:54:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"31b975303674bd315d91c36c70c618fdae5bd76df92e536aea5bd658d1ae3365","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-12-19T14:24:23Z","title_canon_sha256":"fc95eb9ffc2e79da746d95c34a3549515fd59aeddfb091132c05e5b302264893"},"schema_version":"1.0","source":{"id":"1612.06195","kind":"arxiv","version":1}},"canonical_sha256":"e8365c32871d806705277ee092da17b764af243662d8036bc2cdb90d943ed13a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e8365c32871d806705277ee092da17b764af243662d8036bc2cdb90d943ed13a","first_computed_at":"2026-05-18T00:54:42.914941Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:54:42.914941Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"nR2IMNsDCuBwS62g6IU5oyNARZm9Qvskr4jjWjwzNeK8/5wTk8mObjVn50Q+gLHg6IyqgMQJvRGf8PaABaeYAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:54:42.915285Z","signed_message":"canonical_sha256_bytes"},"source_id":"1612.06195","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0e326146910abff5d49e31838018c85806357dd4349348f0669a1767339c907b","sha256:1691dcf3217a99760bf82c89ee9f78072450253a81d570b4b086b1cceb7f89bd"],"state_sha256":"25933d0848ebd0a10efb4ee9fb9c4b4d592c48462295c57f42cd928ceb320c7b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7N7htXp0N26hKyFKb/qbpYh8EMQNSP95s6FWPqVXNyi5ozNN/xs2JXMNO/Vn7sm8kqH2oR9BbpZC61XWVX+hAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T08:30:15.909553Z","bundle_sha256":"c9e17f0c40a5c9905b02faa7303f9313d8241a5dec035b415f7e14d61d0e8d9f"}}