{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LDXBTQW3ZTGBJ72FGEUKCPB7JK","short_pith_number":"pith:LDXBTQW3","schema_version":"1.0","canonical_sha256":"58ee19c2dbcccc14ff453128a13c3f4aa120d077a926d069ebaa057df479cedd","source":{"kind":"arxiv","id":"2606.17104","version":1},"attestation_state":"computed","paper":{"title":"Prefill/Decode-Aware Evaluation of LLM Inference on Emerging AI Accelerators","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.DC"],"primary_cat":"cs.AR","authors_text":"E. Wes Bethel, Shun Usami, Venkatram Vishwanath","submitted_at":"2026-06-14T12:23:29Z","abstract_excerpt":"As large language models (LLMs) are increasingly deployed in latency- and cost-sensitive settings, inference efficiency has become a central systems challenge. While GPUs dominate current deployments, a growing number of AI accelerators claim advantages for LLM inference, yet it remains unclear under which conditions such accelerators outperform GPUs in practice. Recent inference systems decompose execution into Prefill and Decode phases, which exhibit distinct computational characteristics and latency metrics, commonly captured by time to first token (TTFT) and time per output token (TPOT).\n "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.17104","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AR","submitted_at":"2026-06-14T12:23:29Z","cross_cats_sorted":["cs.AI","cs.DC"],"title_canon_sha256":"86000184777d0a267a86330aa24c8362a123a32df9223f934f8e5bcd61cc8d22","abstract_canon_sha256":"35bd366a09e93eeca91fe556339e99c34eab65a9fe019f46cd085c916ccf99d2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:03.240755Z","signature_b64":"u0IwMYhvAKiCEUghszrWVDgUR+tAdkDVS2sDkA33YE41xj/5EpJaDrGIXi2ixp8pOIGJ0JWdp9fgtkflCqRPBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"58ee19c2dbcccc14ff453128a13c3f4aa120d077a926d069ebaa057df479cedd","last_reissued_at":"2026-06-19T16:10:03.240362Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:03.240362Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Prefill/Decode-Aware Evaluation of LLM Inference on Emerging AI Accelerators","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.DC"],"primary_cat":"cs.AR","authors_text":"E. Wes Bethel, Shun Usami, Venkatram Vishwanath","submitted_at":"2026-06-14T12:23:29Z","abstract_excerpt":"As large language models (LLMs) are increasingly deployed in latency- and cost-sensitive settings, inference efficiency has become a central systems challenge. While GPUs dominate current deployments, a growing number of AI accelerators claim advantages for LLM inference, yet it remains unclear under which conditions such accelerators outperform GPUs in practice. Recent inference systems decompose execution into Prefill and Decode phases, which exhibit distinct computational characteristics and latency metrics, commonly captured by time to first token (TTFT) and time per output token (TPOT).\n "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.17104","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.17104/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.17104","created_at":"2026-06-19T16:10:03.240420+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.17104v1","created_at":"2026-06-19T16:10:03.240420+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.17104","created_at":"2026-06-19T16:10:03.240420+00:00"},{"alias_kind":"pith_short_12","alias_value":"LDXBTQW3ZTGB","created_at":"2026-06-19T16:10:03.240420+00:00"},{"alias_kind":"pith_short_16","alias_value":"LDXBTQW3ZTGBJ72F","created_at":"2026-06-19T16:10:03.240420+00:00"},{"alias_kind":"pith_short_8","alias_value":"LDXBTQW3","created_at":"2026-06-19T16:10:03.240420+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK","json":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK.json","graph_json":"https://pith.science/api/pith-number/LDXBTQW3ZTGBJ72FGEUKCPB7JK/graph.json","events_json":"https://pith.science/api/pith-number/LDXBTQW3ZTGBJ72FGEUKCPB7JK/events.json","paper":"https://pith.science/paper/LDXBTQW3"},"agent_actions":{"view_html":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK","download_json":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK.json","view_paper":"https://pith.science/paper/LDXBTQW3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.17104&json=true","fetch_graph":"https://pith.science/api/pith-number/LDXBTQW3ZTGBJ72FGEUKCPB7JK/graph.json","fetch_events":"https://pith.science/api/pith-number/LDXBTQW3ZTGBJ72FGEUKCPB7JK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK/action/storage_attestation","attest_author":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK/action/author_attestation","sign_citation":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK/action/citation_signature","submit_replication":"https://pith.science/pith/LDXBTQW3ZTGBJ72FGEUKCPB7JK/action/replication_record"}},"created_at":"2026-06-19T16:10:03.240420+00:00","updated_at":"2026-06-19T16:10:03.240420+00:00"}