{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:RA5C2KJUFDKLXJW3C3H4I7OMZG","short_pith_number":"pith:RA5C2KJU","canonical_record":{"source":{"id":"1512.03444","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-12-10T21:20:14Z","cross_cats_sorted":[],"title_canon_sha256":"b6eea4483b2399e1644cd6d51ef4db497ff5fe6e0d78c4f535d7088e9d0e78e7","abstract_canon_sha256":"589040bf23f7ae41e5ba8e8b585a3a22f94a11a810bcbc7627cb5ac3d259a101"},"schema_version":"1.0"},"canonical_sha256":"883a2d293428d4bba6db16cfc47dccc99b445eba0117f3453b68213b82477978","source":{"kind":"arxiv","id":"1512.03444","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.03444","created_at":"2026-05-18T01:24:32Z"},{"alias_kind":"arxiv_version","alias_value":"1512.03444v1","created_at":"2026-05-18T01:24:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.03444","created_at":"2026-05-18T01:24:32Z"},{"alias_kind":"pith_short_12","alias_value":"RA5C2KJUFDKL","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_16","alias_value":"RA5C2KJUFDKLXJW3","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_8","alias_value":"RA5C2KJU","created_at":"2026-05-18T12:29:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:RA5C2KJUFDKLXJW3C3H4I7OMZG","target":"record","payload":{"canonical_record":{"source":{"id":"1512.03444","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-12-10T21:20:14Z","cross_cats_sorted":[],"title_canon_sha256":"b6eea4483b2399e1644cd6d51ef4db497ff5fe6e0d78c4f535d7088e9d0e78e7","abstract_canon_sha256":"589040bf23f7ae41e5ba8e8b585a3a22f94a11a810bcbc7627cb5ac3d259a101"},"schema_version":"1.0"},"canonical_sha256":"883a2d293428d4bba6db16cfc47dccc99b445eba0117f3453b68213b82477978","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:24:32.431428Z","signature_b64":"kFAYqp798WiYxCvWhDNoBSVyuVGTbSOzLucuJy0X60gQTIwEn956rW3DNwdq2QMiZbmakb9LDSPG7ksLv+emCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"883a2d293428d4bba6db16cfc47dccc99b445eba0117f3453b68213b82477978","last_reissued_at":"2026-05-18T01:24:32.430950Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:24:32.430950Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1512.03444","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:24:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zuB2KWHXBic2YOXM592397OX2Iq//Au8RbwaRvLS3wgq3EmZma75i55sg1Um5kqS1X9bzT3GvuFSU8aIvYl6DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T21:59:37.234472Z"},"content_sha256":"0eb1480b27be58c66ff9a4e808bbb0c24c03d293ba6503ca098011c86b5fc742","schema_version":"1.0","event_id":"sha256:0eb1480b27be58c66ff9a4e808bbb0c24c03d293ba6503ca098011c86b5fc742"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:RA5C2KJUFDKLXJW3C3H4I7OMZG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Cross-Validated Variable Selection in Tree-Based Methods Improves Predictive Performance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ML","authors_text":"Amichai Painsky, Saharon Rosset","submitted_at":"2015-12-10T21:20:14Z","abstract_excerpt":"Recursive partitioning approaches producing tree-like models are a long standing staple of predictive modeling, in the last decade mostly as ``sub-learners'' within state of the art ensemble methods like Boosting and Random Forest. However, a fundamental flaw in the partitioning (or splitting) rule of commonly used tree building methods precludes them from treating different types of variables equally. This most clearly manifests in these methods' inability to properly utilize categorical variables with a large number of categories, which are ubiquitous in the new age of big data. Such variabl"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.03444","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:24:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"z6lnorKnEQE1oT9fBNiwAwRDDGc0K3eA88VI5QsJdNTwks1RIOg7h0hR9Ire48RO+UlUxiUWqNj5pvjaAhdMBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T21:59:37.234802Z"},"content_sha256":"0e1c246230ecfb3a3d6cb5cccb91d16c798f3419c115eb556973f3888c9b9751","schema_version":"1.0","event_id":"sha256:0e1c246230ecfb3a3d6cb5cccb91d16c798f3419c115eb556973f3888c9b9751"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG/bundle.json","state_url":"https://pith.science/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T21:59:37Z","links":{"resolver":"https://pith.science/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG","bundle":"https://pith.science/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG/bundle.json","state":"https://pith.science/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RA5C2KJUFDKLXJW3C3H4I7OMZG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:RA5C2KJUFDKLXJW3C3H4I7OMZG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"589040bf23f7ae41e5ba8e8b585a3a22f94a11a810bcbc7627cb5ac3d259a101","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-12-10T21:20:14Z","title_canon_sha256":"b6eea4483b2399e1644cd6d51ef4db497ff5fe6e0d78c4f535d7088e9d0e78e7"},"schema_version":"1.0","source":{"id":"1512.03444","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.03444","created_at":"2026-05-18T01:24:32Z"},{"alias_kind":"arxiv_version","alias_value":"1512.03444v1","created_at":"2026-05-18T01:24:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.03444","created_at":"2026-05-18T01:24:32Z"},{"alias_kind":"pith_short_12","alias_value":"RA5C2KJUFDKL","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_16","alias_value":"RA5C2KJUFDKLXJW3","created_at":"2026-05-18T12:29:39Z"},{"alias_kind":"pith_short_8","alias_value":"RA5C2KJU","created_at":"2026-05-18T12:29:39Z"}],"graph_snapshots":[{"event_id":"sha256:0e1c246230ecfb3a3d6cb5cccb91d16c798f3419c115eb556973f3888c9b9751","target":"graph","created_at":"2026-05-18T01:24:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recursive partitioning approaches producing tree-like models are a long standing staple of predictive modeling, in the last decade mostly as ``sub-learners'' within state of the art ensemble methods like Boosting and Random Forest. However, a fundamental flaw in the partitioning (or splitting) rule of commonly used tree building methods precludes them from treating different types of variables equally. This most clearly manifests in these methods' inability to properly utilize categorical variables with a large number of categories, which are ubiquitous in the new age of big data. Such variabl","authors_text":"Amichai Painsky, Saharon Rosset","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-12-10T21:20:14Z","title":"Cross-Validated Variable Selection in Tree-Based Methods Improves Predictive Performance"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.03444","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0eb1480b27be58c66ff9a4e808bbb0c24c03d293ba6503ca098011c86b5fc742","target":"record","created_at":"2026-05-18T01:24:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"589040bf23f7ae41e5ba8e8b585a3a22f94a11a810bcbc7627cb5ac3d259a101","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2015-12-10T21:20:14Z","title_canon_sha256":"b6eea4483b2399e1644cd6d51ef4db497ff5fe6e0d78c4f535d7088e9d0e78e7"},"schema_version":"1.0","source":{"id":"1512.03444","kind":"arxiv","version":1}},"canonical_sha256":"883a2d293428d4bba6db16cfc47dccc99b445eba0117f3453b68213b82477978","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"883a2d293428d4bba6db16cfc47dccc99b445eba0117f3453b68213b82477978","first_computed_at":"2026-05-18T01:24:32.430950Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:24:32.430950Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"kFAYqp798WiYxCvWhDNoBSVyuVGTbSOzLucuJy0X60gQTIwEn956rW3DNwdq2QMiZbmakb9LDSPG7ksLv+emCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:24:32.431428Z","signed_message":"canonical_sha256_bytes"},"source_id":"1512.03444","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0eb1480b27be58c66ff9a4e808bbb0c24c03d293ba6503ca098011c86b5fc742","sha256:0e1c246230ecfb3a3d6cb5cccb91d16c798f3419c115eb556973f3888c9b9751"],"state_sha256":"21ecbba8162f9cfeeef4f98f79ffa61e8cbf28d67684ae1f9a388ef3611b5f02"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"czfHDpAIYRk3ftFlHygETyVbTarI/C6D2awAcaVgdmB3jAZToP5BTgIOn3gmBy1HcN/Cr55OBumR4Rtt2eGTCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T21:59:37.236771Z","bundle_sha256":"0c023fb90c7572119b052f9bc3d793a49e3d4b9d8f761a072c5e96321d302630"}}