{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:5P6HWQQYUQXHDNBQPND6F4MIFF","short_pith_number":"pith:5P6HWQQY","canonical_record":{"source":{"id":"1804.06755","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T14:20:53Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"cedae13d4e761dd99f2b81e016eee61e4f7b53bf7fc0dde6cbd66147d6b85d14","abstract_canon_sha256":"e6cc395995998a0b7f674edd0077f9934618aa8670dba4dae77666be5f5841e2"},"schema_version":"1.0"},"canonical_sha256":"ebfc7b4218a42e71b4307b47e2f188296a39da03b034e08066e0b7b23afaf2c5","source":{"kind":"arxiv","id":"1804.06755","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.06755","created_at":"2026-05-18T00:18:05Z"},{"alias_kind":"arxiv_version","alias_value":"1804.06755v1","created_at":"2026-05-18T00:18:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.06755","created_at":"2026-05-18T00:18:05Z"},{"alias_kind":"pith_short_12","alias_value":"5P6HWQQYUQXH","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_16","alias_value":"5P6HWQQYUQXHDNBQ","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_8","alias_value":"5P6HWQQY","created_at":"2026-05-18T12:32:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:5P6HWQQYUQXHDNBQPND6F4MIFF","target":"record","payload":{"canonical_record":{"source":{"id":"1804.06755","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T14:20:53Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"cedae13d4e761dd99f2b81e016eee61e4f7b53bf7fc0dde6cbd66147d6b85d14","abstract_canon_sha256":"e6cc395995998a0b7f674edd0077f9934618aa8670dba4dae77666be5f5841e2"},"schema_version":"1.0"},"canonical_sha256":"ebfc7b4218a42e71b4307b47e2f188296a39da03b034e08066e0b7b23afaf2c5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:18:05.825167Z","signature_b64":"XFbZPWIxIchDb72nacxRKYF0tSQdkSzxCwioAGgY+Wn+wZTCe89zlKzK+6fvT7gLNAFkZpjyOY8U18gyDZqQDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ebfc7b4218a42e71b4307b47e2f188296a39da03b034e08066e0b7b23afaf2c5","last_reissued_at":"2026-05-18T00:18:05.824527Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:18:05.824527Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.06755","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SGHOeWxWYLT2BcExQbWcUrTAeKjm72EqwXj+9AP9YBXYGSLKmEXQhKAxTcHEYyJaAUgUppoMwxzRsuI/eFMjBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T19:24:49.954576Z"},"content_sha256":"a4225d2de5db9cd460b19d947567b8a356a11ef30c731cfb993e59605d28a8ec","schema_version":"1.0","event_id":"sha256:a4225d2de5db9cd460b19d947567b8a356a11ef30c731cfb993e59605d28a8ec"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:5P6HWQQYUQXHDNBQPND6F4MIFF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Exact Distributed Training: Random Forest with Billions of Examples","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Mathieu Guillame-Bert, Olivier Teytaud","submitted_at":"2018-04-18T14:20:53Z","abstract_excerpt":"We introduce an exact distributed algorithm to train Random Forest models as well as other decision forest models without relying on approximating best split search. We explain the proposed algorithm and compare it to related approaches for various complexity measures (time, ram, disk, and network complexity analysis). We report its running performances on artificial and real-world datasets of up to 18 billions examples. This figure is several orders of magnitude larger than datasets tackled in the existing literature. Finally, we empirically show that Random Forest benefits from being trained"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.06755","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m/AYaiU+0qgiE3wUnM64jqCb/fQCAVgTTqJTf+sFZQQUfoZWxKs9WouAz0yeckVXALJZWW8aek3c6zaLeZcJDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T19:24:49.954924Z"},"content_sha256":"751d816c0b1197c8a2d21537a588194e188d2fc84a242b9596400abfeb857bd6","schema_version":"1.0","event_id":"sha256:751d816c0b1197c8a2d21537a588194e188d2fc84a242b9596400abfeb857bd6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5P6HWQQYUQXHDNBQPND6F4MIFF/bundle.json","state_url":"https://pith.science/pith/5P6HWQQYUQXHDNBQPND6F4MIFF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5P6HWQQYUQXHDNBQPND6F4MIFF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T19:24:49Z","links":{"resolver":"https://pith.science/pith/5P6HWQQYUQXHDNBQPND6F4MIFF","bundle":"https://pith.science/pith/5P6HWQQYUQXHDNBQPND6F4MIFF/bundle.json","state":"https://pith.science/pith/5P6HWQQYUQXHDNBQPND6F4MIFF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5P6HWQQYUQXHDNBQPND6F4MIFF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:5P6HWQQYUQXHDNBQPND6F4MIFF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e6cc395995998a0b7f674edd0077f9934618aa8670dba4dae77666be5f5841e2","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T14:20:53Z","title_canon_sha256":"cedae13d4e761dd99f2b81e016eee61e4f7b53bf7fc0dde6cbd66147d6b85d14"},"schema_version":"1.0","source":{"id":"1804.06755","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.06755","created_at":"2026-05-18T00:18:05Z"},{"alias_kind":"arxiv_version","alias_value":"1804.06755v1","created_at":"2026-05-18T00:18:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.06755","created_at":"2026-05-18T00:18:05Z"},{"alias_kind":"pith_short_12","alias_value":"5P6HWQQYUQXH","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_16","alias_value":"5P6HWQQYUQXHDNBQ","created_at":"2026-05-18T12:32:08Z"},{"alias_kind":"pith_short_8","alias_value":"5P6HWQQY","created_at":"2026-05-18T12:32:08Z"}],"graph_snapshots":[{"event_id":"sha256:751d816c0b1197c8a2d21537a588194e188d2fc84a242b9596400abfeb857bd6","target":"graph","created_at":"2026-05-18T00:18:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce an exact distributed algorithm to train Random Forest models as well as other decision forest models without relying on approximating best split search. We explain the proposed algorithm and compare it to related approaches for various complexity measures (time, ram, disk, and network complexity analysis). We report its running performances on artificial and real-world datasets of up to 18 billions examples. This figure is several orders of magnitude larger than datasets tackled in the existing literature. Finally, we empirically show that Random Forest benefits from being trained","authors_text":"Mathieu Guillame-Bert, Olivier Teytaud","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T14:20:53Z","title":"Exact Distributed Training: Random Forest with Billions of Examples"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.06755","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a4225d2de5db9cd460b19d947567b8a356a11ef30c731cfb993e59605d28a8ec","target":"record","created_at":"2026-05-18T00:18:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e6cc395995998a0b7f674edd0077f9934618aa8670dba4dae77666be5f5841e2","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-04-18T14:20:53Z","title_canon_sha256":"cedae13d4e761dd99f2b81e016eee61e4f7b53bf7fc0dde6cbd66147d6b85d14"},"schema_version":"1.0","source":{"id":"1804.06755","kind":"arxiv","version":1}},"canonical_sha256":"ebfc7b4218a42e71b4307b47e2f188296a39da03b034e08066e0b7b23afaf2c5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ebfc7b4218a42e71b4307b47e2f188296a39da03b034e08066e0b7b23afaf2c5","first_computed_at":"2026-05-18T00:18:05.824527Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:18:05.824527Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"XFbZPWIxIchDb72nacxRKYF0tSQdkSzxCwioAGgY+Wn+wZTCe89zlKzK+6fvT7gLNAFkZpjyOY8U18gyDZqQDw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:18:05.825167Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.06755","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a4225d2de5db9cd460b19d947567b8a356a11ef30c731cfb993e59605d28a8ec","sha256:751d816c0b1197c8a2d21537a588194e188d2fc84a242b9596400abfeb857bd6"],"state_sha256":"35a2c86aab02bc2c3eb2a5032e1f25f009146de05cb78f7b1abb81334f9ae65c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"d8jc7ivgVmaGBOGQrhtJ4MhCFLpbjJd5/+LqxufkxswlaaX/Wri95KfnFiJqwZO5XYh1pNWSb41Ek6wMD2UpCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T19:24:49.957070Z","bundle_sha256":"b3b7134f10327db8720fdbc7313667bb7255daee6d2494e11ca82b60947ea87d"}}