{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:HFDC32CYMBQPQSK6WYTO6WR6PJ","short_pith_number":"pith:HFDC32CY","schema_version":"1.0","canonical_sha256":"39462de8586060f8495eb626ef5a3e7a5c5e64aef70d4a2bcdfb0f3f30e38efd","source":{"kind":"arxiv","id":"1603.02754","version":3},"attestation_state":"computed","paper":{"title":"XGBoost: A Scalable Tree Boosting System","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Carlos Guestrin, Tianqi Chen","submitted_at":"2016-03-09T01:11:51Z","abstract_excerpt":"Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examp"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1603.02754","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-09T01:11:51Z","cross_cats_sorted":[],"title_canon_sha256":"f9cbc13a4291faaaa2196482a52a766dac154fdeb699fc281fe2eedddef6524a","abstract_canon_sha256":"2820ded071a737dcf6dcac1ede35ae14df0e2313ab2270f830665f1736cee980"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:12:34.408388Z","signature_b64":"cvYq8usfXY9gg5cw+PtcF5X0cnFN2tk8yfPM8tsO5kSpqgUlfulvGKMnRYZW7xnAuNGtgc6VFAocaxJqwyAhAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"39462de8586060f8495eb626ef5a3e7a5c5e64aef70d4a2bcdfb0f3f30e38efd","last_reissued_at":"2026-05-18T01:12:34.407775Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:12:34.407775Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"XGBoost: A Scalable Tree Boosting System","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Carlos Guestrin, Tianqi Chen","submitted_at":"2016-03-09T01:11:51Z","abstract_excerpt":"Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable end-to-end tree boosting system called XGBoost, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, XGBoost scales beyond billions of examp"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.02754","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1603.02754","created_at":"2026-05-18T01:12:34.407860+00:00"},{"alias_kind":"arxiv_version","alias_value":"1603.02754v3","created_at":"2026-05-18T01:12:34.407860+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.02754","created_at":"2026-05-18T01:12:34.407860+00:00"},{"alias_kind":"pith_short_12","alias_value":"HFDC32CYMBQP","created_at":"2026-05-18T12:30:19.053100+00:00"},{"alias_kind":"pith_short_16","alias_value":"HFDC32CYMBQPQSK6","created_at":"2026-05-18T12:30:19.053100+00:00"},{"alias_kind":"pith_short_8","alias_value":"HFDC32CY","created_at":"2026-05-18T12:30:19.053100+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":25,"internal_anchor_count":15,"sample":[{"citing_arxiv_id":"2605.22858","citing_title":"Classification of IED-free EEG Responses for Assisted Epilepsy Diagnosis","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2408.00885","citing_title":"A Perfect Storm: First-Nature Geography and Economic Development","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2502.06064","citing_title":"Inclusive Search for Anomalous Single-Photon Production in MicroBooNE","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2601.03883","citing_title":"Filtering Interlopers with Photometry and Diagnostic Features: A Machine Learning Framework Validated with CSST Slitless Spectroscopy","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19059","citing_title":"VarWISE: Infrared Variability via NEOWISE Single Exposure Photometry","ref_index":48,"is_internal_anchor":true},{"citing_arxiv_id":"2507.00921","citing_title":"Measurement of charged-current muon neutrino-argon interactions without pions in the final state using the MicroBooNE detector","ref_index":61,"is_internal_anchor":true},{"citing_arxiv_id":"2507.03495","citing_title":"Study of Higgs boson pair production in the $HH \\rightarrow b \\overline{b} \\gamma \\gamma$ final state with 308 fb$^{-1}$ of data collected at $\\sqrt{s} =$ 13 TeV and 13.6 TeV by the ATLAS experiment","ref_index":95,"is_internal_anchor":true},{"citing_arxiv_id":"2509.14070","citing_title":"Observation of $W^{+}W^{-}\\gamma$ production in $pp$ collisions at $\\sqrt{s}$ = 13 TeV with the ATLAS detector and constraints on anomalous quartic gauge-boson couplings","ref_index":66,"is_internal_anchor":true},{"citing_arxiv_id":"2509.24954","citing_title":"Stellar flare detection in XMM-Newton with gradient boosted trees","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2511.02619","citing_title":"Search for $K_{\\mathrm{S(L)}}^{0} \\rightarrow \\pi^{+}\\pi^{-}\\mu^{+}\\mu^{-}$ decays at LHCb","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2601.13640","citing_title":"Enhanced sensitivity to the $H \\to Z\\gamma \\to \\ell^+\\ell^-\\gamma$ decay at the LHC using machine learning and novel kinematic observables","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2603.24602","citing_title":"MuViS: Multimodal Virtual Sensing Benchmark","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2603.29076","citing_title":"Probing Heavy Neutral Higgs Bosons via Single Vector-Like Bottom Quark Production at the HL-LHC","ref_index":84,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13180","citing_title":"Prospects for Measuring $H\\to \\rm{invisble}$ at the FCCee","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13614","citing_title":"Search for pair production of additional neutral scalars within the Inert Doublet Model in a final state with two electrons or two muons in proton-proton collisions at $\\sqrt{s}$ = 13 TeV and 13.6 TeV","ref_index":87,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24152","citing_title":"Probing the electron Yukawa coupling via resonant Higgs boson production at FCC-ee via $e^+e^- \\to H \\to WW^*$ in lepton-plus-jets final states","ref_index":61,"is_internal_anchor":false},{"citing_arxiv_id":"2604.23166","citing_title":"A satellite foundation model for improved wealth monitoring","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04348","citing_title":"Observation of the charmless purely baryonic decay $\\mathinner{\\mathit{\\Lambda}^0_b\\!\\to \\mathit{\\Lambda} p \\overline{p}}$","ref_index":32,"is_internal_anchor":false},{"citing_arxiv_id":"2604.11232","citing_title":"Machine Learning Study on Single Production of a Singlet Vector-like Lepton at the Large Hadron Collider","ref_index":47,"is_internal_anchor":false},{"citing_arxiv_id":"2604.09280","citing_title":"AMO-ENE: Attention-based Multi-Omics Fusion Model for Outcome Prediction in Extra Nodal Extension and HPV-associated Oropharyngeal Cancer","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08021","citing_title":"SynQL: A Controllable and Scalable Rule-Based Framework for SQL Workload Synthesis for Performance Benchmarking","ref_index":29,"is_internal_anchor":false},{"citing_arxiv_id":"2604.06326","citing_title":"Exotic Higgs Decays at a Muon Collider","ref_index":95,"is_internal_anchor":false},{"citing_arxiv_id":"2604.12109","citing_title":"Identifying Changing-Look AGN Transitions in Light Curve Data with the Zwicky Transient Facility","ref_index":15,"is_internal_anchor":false},{"citing_arxiv_id":"2604.18329","citing_title":"From Gaia to GaiaNIR: II. A new view of the Milky Way bar","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2604.18910","citing_title":"Predicting Redshift in Seyfert Galaxies Using Machine Learning","ref_index":2,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ","json":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ.json","graph_json":"https://pith.science/api/pith-number/HFDC32CYMBQPQSK6WYTO6WR6PJ/graph.json","events_json":"https://pith.science/api/pith-number/HFDC32CYMBQPQSK6WYTO6WR6PJ/events.json","paper":"https://pith.science/paper/HFDC32CY"},"agent_actions":{"view_html":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ","download_json":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ.json","view_paper":"https://pith.science/paper/HFDC32CY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1603.02754&json=true","fetch_graph":"https://pith.science/api/pith-number/HFDC32CYMBQPQSK6WYTO6WR6PJ/graph.json","fetch_events":"https://pith.science/api/pith-number/HFDC32CYMBQPQSK6WYTO6WR6PJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ/action/storage_attestation","attest_author":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ/action/author_attestation","sign_citation":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ/action/citation_signature","submit_replication":"https://pith.science/pith/HFDC32CYMBQPQSK6WYTO6WR6PJ/action/replication_record"}},"created_at":"2026-05-18T01:12:34.407860+00:00","updated_at":"2026-05-18T01:12:34.407860+00:00"}