{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:LHRAR6AG5HZIUORJ5OH6OBVLXT","short_pith_number":"pith:LHRAR6AG","canonical_record":{"source":{"id":"1807.01566","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2018-07-04T13:23:09Z","cross_cats_sorted":[],"title_canon_sha256":"90d0d92fc01ce56e37fca0252c2345e2fd1dc2496c2a21c69756809590b7dec1","abstract_canon_sha256":"5024b308b116b17f176cf44fac7611d5a518230d463702d777bf2e847ce7d04a"},"schema_version":"1.0"},"canonical_sha256":"59e208f806e9f28a3a29eb8fe706abbce11fde3fb6e90b3c8822d64640691f1d","source":{"kind":"arxiv","id":"1807.01566","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.01566","created_at":"2026-05-18T00:11:29Z"},{"alias_kind":"arxiv_version","alias_value":"1807.01566v1","created_at":"2026-05-18T00:11:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.01566","created_at":"2026-05-18T00:11:29Z"},{"alias_kind":"pith_short_12","alias_value":"LHRAR6AG5HZI","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LHRAR6AG5HZIUORJ","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LHRAR6AG","created_at":"2026-05-18T12:32:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:LHRAR6AG5HZIUORJ5OH6OBVLXT","target":"record","payload":{"canonical_record":{"source":{"id":"1807.01566","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2018-07-04T13:23:09Z","cross_cats_sorted":[],"title_canon_sha256":"90d0d92fc01ce56e37fca0252c2345e2fd1dc2496c2a21c69756809590b7dec1","abstract_canon_sha256":"5024b308b116b17f176cf44fac7611d5a518230d463702d777bf2e847ce7d04a"},"schema_version":"1.0"},"canonical_sha256":"59e208f806e9f28a3a29eb8fe706abbce11fde3fb6e90b3c8822d64640691f1d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:11:29.469230Z","signature_b64":"j4ov5HFpweGpN0J9eo+aCPsoBNHXlsf31M66K42E43NytOY+DHHJzY4rrZ+0Is89KUVgds6wVnnVd1GwPxcpDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"59e208f806e9f28a3a29eb8fe706abbce11fde3fb6e90b3c8822d64640691f1d","last_reissued_at":"2026-05-18T00:11:29.468638Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:11:29.468638Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1807.01566","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NCB/X7leN9Y/j1bLBc5gnfGbZaCIZzcgDsCjXSZhz9MnwCzv6O9IwHlKnhn7H3TAOsDHNv2CiF/ELkfRelQxAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T20:39:45.880614Z"},"content_sha256":"aee3eb187b0919e4abdffd9e7230e33005cfb5a36e79ddf10b741a44f2f681da","schema_version":"1.0","event_id":"sha256:aee3eb187b0919e4abdffd9e7230e33005cfb5a36e79ddf10b741a44f2f681da"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:LHRAR6AG5HZIUORJ5OH6OBVLXT","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Analyzing Big Datasets of Genomic Sequences: Fast and Scalable Collection of k-mer Statistics","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Giuseppe Cattaneo, Mara Sorella, Raffaele Giancarlo, Simona Rombo, Umberto Ferraro Petrillo","submitted_at":"2018-07-04T13:23:09Z","abstract_excerpt":"Distributed approaches based on the map-reduce programming paradigm have started to be proposed in the bioinformatics domain, due to the large amount of data produced by the next-generation sequencing techniques. However, the use of map-reduce and related Big Data technologies and frameworks (e.g., Apache Hadoop and Spark) does not necessarily produce satisfactory results, in terms of both efficiency and effectiveness. We discuss how the development of distributed and Big Data management technologies has affected the analysis of large datasets of biological sequences. Moreover, we show how the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.01566","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HuJydIVhFq/2Q1tUAMx9MeO4+yJ4zSJRVf0Ye3fND0WjO/OwyZ8i2bxXMLKQdX+KMMa1zMTNGDw5NyVJHpGSBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T20:39:45.881187Z"},"content_sha256":"dfae412b6facdfed63f311fa4f18928e2223e1cc00f32f525c4c563b7d25affb","schema_version":"1.0","event_id":"sha256:dfae412b6facdfed63f311fa4f18928e2223e1cc00f32f525c4c563b7d25affb"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT/bundle.json","state_url":"https://pith.science/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T20:39:45Z","links":{"resolver":"https://pith.science/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT","bundle":"https://pith.science/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT/bundle.json","state":"https://pith.science/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LHRAR6AG5HZIUORJ5OH6OBVLXT/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:LHRAR6AG5HZIUORJ5OH6OBVLXT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5024b308b116b17f176cf44fac7611d5a518230d463702d777bf2e847ce7d04a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2018-07-04T13:23:09Z","title_canon_sha256":"90d0d92fc01ce56e37fca0252c2345e2fd1dc2496c2a21c69756809590b7dec1"},"schema_version":"1.0","source":{"id":"1807.01566","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1807.01566","created_at":"2026-05-18T00:11:29Z"},{"alias_kind":"arxiv_version","alias_value":"1807.01566v1","created_at":"2026-05-18T00:11:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.01566","created_at":"2026-05-18T00:11:29Z"},{"alias_kind":"pith_short_12","alias_value":"LHRAR6AG5HZI","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_16","alias_value":"LHRAR6AG5HZIUORJ","created_at":"2026-05-18T12:32:37Z"},{"alias_kind":"pith_short_8","alias_value":"LHRAR6AG","created_at":"2026-05-18T12:32:37Z"}],"graph_snapshots":[{"event_id":"sha256:dfae412b6facdfed63f311fa4f18928e2223e1cc00f32f525c4c563b7d25affb","target":"graph","created_at":"2026-05-18T00:11:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Distributed approaches based on the map-reduce programming paradigm have started to be proposed in the bioinformatics domain, due to the large amount of data produced by the next-generation sequencing techniques. However, the use of map-reduce and related Big Data technologies and frameworks (e.g., Apache Hadoop and Spark) does not necessarily produce satisfactory results, in terms of both efficiency and effectiveness. We discuss how the development of distributed and Big Data management technologies has affected the analysis of large datasets of biological sequences. Moreover, we show how the","authors_text":"Giuseppe Cattaneo, Mara Sorella, Raffaele Giancarlo, Simona Rombo, Umberto Ferraro Petrillo","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2018-07-04T13:23:09Z","title":"Analyzing Big Datasets of Genomic Sequences: Fast and Scalable Collection of k-mer Statistics"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.01566","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:aee3eb187b0919e4abdffd9e7230e33005cfb5a36e79ddf10b741a44f2f681da","target":"record","created_at":"2026-05-18T00:11:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5024b308b116b17f176cf44fac7611d5a518230d463702d777bf2e847ce7d04a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2018-07-04T13:23:09Z","title_canon_sha256":"90d0d92fc01ce56e37fca0252c2345e2fd1dc2496c2a21c69756809590b7dec1"},"schema_version":"1.0","source":{"id":"1807.01566","kind":"arxiv","version":1}},"canonical_sha256":"59e208f806e9f28a3a29eb8fe706abbce11fde3fb6e90b3c8822d64640691f1d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"59e208f806e9f28a3a29eb8fe706abbce11fde3fb6e90b3c8822d64640691f1d","first_computed_at":"2026-05-18T00:11:29.468638Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:11:29.468638Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"j4ov5HFpweGpN0J9eo+aCPsoBNHXlsf31M66K42E43NytOY+DHHJzY4rrZ+0Is89KUVgds6wVnnVd1GwPxcpDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:11:29.469230Z","signed_message":"canonical_sha256_bytes"},"source_id":"1807.01566","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:aee3eb187b0919e4abdffd9e7230e33005cfb5a36e79ddf10b741a44f2f681da","sha256:dfae412b6facdfed63f311fa4f18928e2223e1cc00f32f525c4c563b7d25affb"],"state_sha256":"9d7c50190f57bc0c46ad93c3f10689663dce60212d3684ebe6db21455b9cd5ec"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xGlfS7DeEeOnVunfTcqyMSBaxmCD4L4WIU7bzN6tsfhztlK7wkV/joj+mBfHVsaoUsCWkJgBpxKgbxzuqjrnDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T20:39:45.884609Z","bundle_sha256":"a6f24079cb79657602310d35cfb0d373f8be2cc332da0e4a61f1d3f57e86e9f2"}}