{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:KNL72MFHWEHMKKXYLMAVS2EGYB","short_pith_number":"pith:KNL72MFH","schema_version":"1.0","canonical_sha256":"5357fd30a7b10ec52af85b01596886c07bc964b75de0198f092cf0501c2278e9","source":{"kind":"arxiv","id":"1511.08327","version":2},"attestation_state":"computed","paper":{"title":"Random Forests for Big Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","math.ST","stat.TH"],"primary_cat":"stat.ML","authors_text":"Christine Tuleau-Malot (JAD), Jean-Michel Poggi (UPD5, LM-Orsay), Nathalie Villa-Vialaneix (MIAT INRA), Robin Genuer (ISPED, SISTM)","submitted_at":"2015-11-26T09:04:47Z","abstract_excerpt":"Big Data is one of the major challenges of statistical science and has numerous consequences from  algorithmic and theoretical viewpoints. Big Data always involve massive data but they also often include online data and data heterogeneity. Recently some statistical methods have been adapted to process Big Data, like linear regression models, clustering methods and bootstrapping schemes. Based on decision trees combined with aggregation and bootstrap ideas, random forests were introduced by Breiman in 2001. They are a powerful nonparametric statistical method allowing to consider in a single an"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.08327","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-11-26T09:04:47Z","cross_cats_sorted":["cs.LG","math.ST","stat.TH"],"title_canon_sha256":"991b34c8445c769962c9e8a15018f282a71dd542c114451103870a8e14663ffd","abstract_canon_sha256":"9b3125af9e90299583098391ea37cf16cdbb88955c683efec1d5d2023ffd9b62"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:48:09.988230Z","signature_b64":"JRcysS4VsoI0GNQx+hdGjyXkNIUJiSBm9juvVrBfCV7xaPJigs/+NfNzZ3/Wno0ldwv4aQCuIK4wOONvpKpiDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5357fd30a7b10ec52af85b01596886c07bc964b75de0198f092cf0501c2278e9","last_reissued_at":"2026-05-18T00:48:09.987752Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:48:09.987752Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Random Forests for Big Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","math.ST","stat.TH"],"primary_cat":"stat.ML","authors_text":"Christine Tuleau-Malot (JAD), Jean-Michel Poggi (UPD5, LM-Orsay), Nathalie Villa-Vialaneix (MIAT INRA), Robin Genuer (ISPED, SISTM)","submitted_at":"2015-11-26T09:04:47Z","abstract_excerpt":"Big Data is one of the major challenges of statistical science and has numerous consequences from  algorithmic and theoretical viewpoints. Big Data always involve massive data but they also often include online data and data heterogeneity. Recently some statistical methods have been adapted to process Big Data, like linear regression models, clustering methods and bootstrapping schemes. Based on decision trees combined with aggregation and bootstrap ideas, random forests were introduced by Breiman in 2001. They are a powerful nonparametric statistical method allowing to consider in a single an"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.08327","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.08327","created_at":"2026-05-18T00:48:09.987834+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.08327v2","created_at":"2026-05-18T00:48:09.987834+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.08327","created_at":"2026-05-18T00:48:09.987834+00:00"},{"alias_kind":"pith_short_12","alias_value":"KNL72MFHWEHM","created_at":"2026-05-18T12:29:29.992203+00:00"},{"alias_kind":"pith_short_16","alias_value":"KNL72MFHWEHMKKXY","created_at":"2026-05-18T12:29:29.992203+00:00"},{"alias_kind":"pith_short_8","alias_value":"KNL72MFH","created_at":"2026-05-18T12:29:29.992203+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB","json":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB.json","graph_json":"https://pith.science/api/pith-number/KNL72MFHWEHMKKXYLMAVS2EGYB/graph.json","events_json":"https://pith.science/api/pith-number/KNL72MFHWEHMKKXYLMAVS2EGYB/events.json","paper":"https://pith.science/paper/KNL72MFH"},"agent_actions":{"view_html":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB","download_json":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB.json","view_paper":"https://pith.science/paper/KNL72MFH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.08327&json=true","fetch_graph":"https://pith.science/api/pith-number/KNL72MFHWEHMKKXYLMAVS2EGYB/graph.json","fetch_events":"https://pith.science/api/pith-number/KNL72MFHWEHMKKXYLMAVS2EGYB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB/action/storage_attestation","attest_author":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB/action/author_attestation","sign_citation":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB/action/citation_signature","submit_replication":"https://pith.science/pith/KNL72MFHWEHMKKXYLMAVS2EGYB/action/replication_record"}},"created_at":"2026-05-18T00:48:09.987834+00:00","updated_at":"2026-05-18T00:48:09.987834+00:00"}