{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:CG27KS5FVOGP2O4Y4NHHYIT7BB","short_pith_number":"pith:CG27KS5F","schema_version":"1.0","canonical_sha256":"11b5f54ba5ab8cfd3b98e34e7c227f08749348dbe7999e709629ab102fcbe588","source":{"kind":"arxiv","id":"2509.20241","version":1},"attestation_state":"computed","paper":{"title":"Energy Use of AI Inference: Efficiency Pathways and Test-Time Compute","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.LG","authors_text":"Allen Kim, Amy Luers, Esha Choukse, Felipe Oviedo, Fiodar Kazhamiaka, Juan M. Lavista Ferres, Melanie Nakagawa, Ricardo Bianchini","submitted_at":"2025-09-24T15:32:01Z","abstract_excerpt":"As AI inference scales to billions of queries and emerging reasoning and agentic workflows increase token demand, reliable estimates of per-query energy use are increasingly important for capacity planning, emissions accounting, and efficiency prioritization. Many public estimates are inconsistent and overstate energy use, because they extrapolate from limited benchmarks and fail to reflect efficiency gains achievable at scale. In this perspective, we introduce a bottom-up methodology to estimate the per-query energy of large-scale LLM systems based on token throughput. For models running on a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2509.20241","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-24T15:32:01Z","cross_cats_sorted":["cs.DC"],"title_canon_sha256":"758f3df14694844a653378aa99dd9657db4ae1cc0ecaa525f3e2ad6b045e44a0","abstract_canon_sha256":"2a1aa124638f0cd0a51b0753329ad39051fd313d25348c6a0e8e3525e8f3285d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:24.518550Z","signature_b64":"2vfdKKM2k/dKUqkiiM6efg68x730n+mMaHTOHtS2Hvpu7cDjcR06K6HtNu5kG+PZLDPZ6F53mzoy2Z8N8FQlDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"11b5f54ba5ab8cfd3b98e34e7c227f08749348dbe7999e709629ab102fcbe588","last_reissued_at":"2026-05-20T00:00:24.517835Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:24.517835Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Energy Use of AI Inference: Efficiency Pathways and Test-Time Compute","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.LG","authors_text":"Allen Kim, Amy Luers, Esha Choukse, Felipe Oviedo, Fiodar Kazhamiaka, Juan M. Lavista Ferres, Melanie Nakagawa, Ricardo Bianchini","submitted_at":"2025-09-24T15:32:01Z","abstract_excerpt":"As AI inference scales to billions of queries and emerging reasoning and agentic workflows increase token demand, reliable estimates of per-query energy use are increasingly important for capacity planning, emissions accounting, and efficiency prioritization. Many public estimates are inconsistent and overstate energy use, because they extrapolate from limited benchmarks and fail to reflect efficiency gains achievable at scale. In this perspective, we introduce a bottom-up methodology to estimate the per-query energy of large-scale LLM systems based on token throughput. For models running on a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.20241","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.20241/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2509.20241","created_at":"2026-05-20T00:00:24.517943+00:00"},{"alias_kind":"arxiv_version","alias_value":"2509.20241v1","created_at":"2026-05-20T00:00:24.517943+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.20241","created_at":"2026-05-20T00:00:24.517943+00:00"},{"alias_kind":"pith_short_12","alias_value":"CG27KS5FVOGP","created_at":"2026-05-20T00:00:24.517943+00:00"},{"alias_kind":"pith_short_16","alias_value":"CG27KS5FVOGP2O4Y","created_at":"2026-05-20T00:00:24.517943+00:00"},{"alias_kind":"pith_short_8","alias_value":"CG27KS5F","created_at":"2026-05-20T00:00:24.517943+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.22883","citing_title":"Energy per Successful Goal: Goal-Level Energy Accounting for Agentic AI Systems","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2511.07885","citing_title":"Intelligence per Watt: Measuring Intelligence Efficiency of Local AI","ref_index":3,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB","json":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB.json","graph_json":"https://pith.science/api/pith-number/CG27KS5FVOGP2O4Y4NHHYIT7BB/graph.json","events_json":"https://pith.science/api/pith-number/CG27KS5FVOGP2O4Y4NHHYIT7BB/events.json","paper":"https://pith.science/paper/CG27KS5F"},"agent_actions":{"view_html":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB","download_json":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB.json","view_paper":"https://pith.science/paper/CG27KS5F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2509.20241&json=true","fetch_graph":"https://pith.science/api/pith-number/CG27KS5FVOGP2O4Y4NHHYIT7BB/graph.json","fetch_events":"https://pith.science/api/pith-number/CG27KS5FVOGP2O4Y4NHHYIT7BB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB/action/storage_attestation","attest_author":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB/action/author_attestation","sign_citation":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB/action/citation_signature","submit_replication":"https://pith.science/pith/CG27KS5FVOGP2O4Y4NHHYIT7BB/action/replication_record"}},"created_at":"2026-05-20T00:00:24.517943+00:00","updated_at":"2026-05-20T00:00:24.517943+00:00"}