{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:4O2OLSMQ5Q44WZYRYHTK7C22OX","short_pith_number":"pith:4O2OLSMQ","schema_version":"1.0","canonical_sha256":"e3b4e5c990ec39cb6711c1e6af8b5a75f2cff7cf3a7f8269d91cf3833207b157","source":{"kind":"arxiv","id":"2606.09514","version":1},"attestation_state":"computed","paper":{"title":"BUDDY: BUdget-Driven DYnamic Depth Routing for Adaptive Large Language Model Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aimin Pan, Changhai Zhou, Fei Yang, Mingze Yin, Shaoqi Yu, Shichao Weng, Yuhua Zhou","submitted_at":"2026-06-08T14:06:35Z","abstract_excerpt":"Large language models (LLMs) incur high inference cost due to their depth and parameter scale. Depth pruning can reduce latency by skipping redundant Transformer blocks, but existing methods (i) provide limited control under user-specific compute budgets and (ii) typically fix the routing path, failing to adapt as the context grows during decoding. We propose Buddy, a budget-driven dynamic depth routing framework. Buddy uses a lightweight Decision Module to score intermediate layers conditioned on the input and deterministically executes the top-k layers to satisfy a given budget. To support d"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.09514","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-08T14:06:35Z","cross_cats_sorted":[],"title_canon_sha256":"3f5996b5a7ba47b3da06bf6d7305230cc4d6f12a7a4c49fa073d4ccf63530a75","abstract_canon_sha256":"d53281609073aedead642a6a49d350da994b8ca13799d2ea44ad22674ae0dba2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:08:52.829693Z","signature_b64":"AxpYY6c3/rIexTaL+FN9OrBMaaAU+1JRh2XRWJhcuYGkrUeiVifd+I6fWaZSXQ2iDX5eA7XUY+zWsKpNO+aEDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e3b4e5c990ec39cb6711c1e6af8b5a75f2cff7cf3a7f8269d91cf3833207b157","last_reissued_at":"2026-06-09T02:08:52.828847Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:08:52.828847Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"BUDDY: BUdget-Driven DYnamic Depth Routing for Adaptive Large Language Model Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aimin Pan, Changhai Zhou, Fei Yang, Mingze Yin, Shaoqi Yu, Shichao Weng, Yuhua Zhou","submitted_at":"2026-06-08T14:06:35Z","abstract_excerpt":"Large language models (LLMs) incur high inference cost due to their depth and parameter scale. Depth pruning can reduce latency by skipping redundant Transformer blocks, but existing methods (i) provide limited control under user-specific compute budgets and (ii) typically fix the routing path, failing to adapt as the context grows during decoding. We propose Buddy, a budget-driven dynamic depth routing framework. Buddy uses a lightweight Decision Module to score intermediate layers conditioned on the input and deterministically executes the top-k layers to satisfy a given budget. To support d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.09514","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.09514/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.09514","created_at":"2026-06-09T02:08:52.828974+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.09514v1","created_at":"2026-06-09T02:08:52.828974+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.09514","created_at":"2026-06-09T02:08:52.828974+00:00"},{"alias_kind":"pith_short_12","alias_value":"4O2OLSMQ5Q44","created_at":"2026-06-09T02:08:52.828974+00:00"},{"alias_kind":"pith_short_16","alias_value":"4O2OLSMQ5Q44WZYR","created_at":"2026-06-09T02:08:52.828974+00:00"},{"alias_kind":"pith_short_8","alias_value":"4O2OLSMQ","created_at":"2026-06-09T02:08:52.828974+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX","json":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX.json","graph_json":"https://pith.science/api/pith-number/4O2OLSMQ5Q44WZYRYHTK7C22OX/graph.json","events_json":"https://pith.science/api/pith-number/4O2OLSMQ5Q44WZYRYHTK7C22OX/events.json","paper":"https://pith.science/paper/4O2OLSMQ"},"agent_actions":{"view_html":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX","download_json":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX.json","view_paper":"https://pith.science/paper/4O2OLSMQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.09514&json=true","fetch_graph":"https://pith.science/api/pith-number/4O2OLSMQ5Q44WZYRYHTK7C22OX/graph.json","fetch_events":"https://pith.science/api/pith-number/4O2OLSMQ5Q44WZYRYHTK7C22OX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX/action/storage_attestation","attest_author":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX/action/author_attestation","sign_citation":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX/action/citation_signature","submit_replication":"https://pith.science/pith/4O2OLSMQ5Q44WZYRYHTK7C22OX/action/replication_record"}},"created_at":"2026-06-09T02:08:52.828974+00:00","updated_at":"2026-06-09T02:08:52.828974+00:00"}