{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:4HYE3WKWCCQODG3G5ZXR6VPAN3","short_pith_number":"pith:4HYE3WKW","schema_version":"1.0","canonical_sha256":"e1f04dd95610a0e19b66ee6f1f55e06ed854679dc5391a2c875e0b8bf8395b93","source":{"kind":"arxiv","id":"2606.26453","version":1},"attestation_state":"computed","paper":{"title":"Optimizing CUDA like a Human: Micro-Profiling Tools as Expert Surrogates for LLM-Based GPU Kernel Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bernie Wang, George Karypis, Haoyang Fang, Huzefa Rangwala, Jiading Gai, Jin Huang, Kaj Bostrom, Shuai Zhang, Vihang Patil","submitted_at":"2026-06-24T23:28:09Z","abstract_excerpt":"We present KernelPro, a closed-loop multi-agent system that automatically generates, profiles, and iteratively optimizes GPU kernel code by integrating large language model (LLM) code generation with hardware profiler feedback and pluggable bottleneck detection tools. KernelPro introduces four contributions: (1) a semantic feedback operator that encodes expert heuristics as pluggable micro-profiling tools, transforming raw hardware metrics into actionable natural language guidance; (2) a two-stage tool invocation architecture where roofline-based bottleneck classification filters which special"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.26453","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-24T23:28:09Z","cross_cats_sorted":[],"title_canon_sha256":"0ac31bab232803b8ce3a09230278c8961a639e7931a41b5542b8388bb2b4ae44","abstract_canon_sha256":"49bf208159126277be40270161c128bd1c60d05bd5bcb3679704bf643ffd5e97"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T00:15:41.693081Z","signature_b64":"kdr0bUC5Uuu1yTfsApkwGr4iczqD41Tg3/qjRDfw4+RKb/Yjekrm7a5YWtkFipI/7k71Kt05m1/fb2X+vai3Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e1f04dd95610a0e19b66ee6f1f55e06ed854679dc5391a2c875e0b8bf8395b93","last_reissued_at":"2026-06-26T00:15:41.692680Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T00:15:41.692680Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Optimizing CUDA like a Human: Micro-Profiling Tools as Expert Surrogates for LLM-Based GPU Kernel Optimization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bernie Wang, George Karypis, Haoyang Fang, Huzefa Rangwala, Jiading Gai, Jin Huang, Kaj Bostrom, Shuai Zhang, Vihang Patil","submitted_at":"2026-06-24T23:28:09Z","abstract_excerpt":"We present KernelPro, a closed-loop multi-agent system that automatically generates, profiles, and iteratively optimizes GPU kernel code by integrating large language model (LLM) code generation with hardware profiler feedback and pluggable bottleneck detection tools. KernelPro introduces four contributions: (1) a semantic feedback operator that encodes expert heuristics as pluggable micro-profiling tools, transforming raw hardware metrics into actionable natural language guidance; (2) a two-stage tool invocation architecture where roofline-based bottleneck classification filters which special"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26453","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26453/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.26453","created_at":"2026-06-26T00:15:41.692737+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.26453v1","created_at":"2026-06-26T00:15:41.692737+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26453","created_at":"2026-06-26T00:15:41.692737+00:00"},{"alias_kind":"pith_short_12","alias_value":"4HYE3WKWCCQO","created_at":"2026-06-26T00:15:41.692737+00:00"},{"alias_kind":"pith_short_16","alias_value":"4HYE3WKWCCQODG3G","created_at":"2026-06-26T00:15:41.692737+00:00"},{"alias_kind":"pith_short_8","alias_value":"4HYE3WKW","created_at":"2026-06-26T00:15:41.692737+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3","json":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3.json","graph_json":"https://pith.science/api/pith-number/4HYE3WKWCCQODG3G5ZXR6VPAN3/graph.json","events_json":"https://pith.science/api/pith-number/4HYE3WKWCCQODG3G5ZXR6VPAN3/events.json","paper":"https://pith.science/paper/4HYE3WKW"},"agent_actions":{"view_html":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3","download_json":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3.json","view_paper":"https://pith.science/paper/4HYE3WKW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.26453&json=true","fetch_graph":"https://pith.science/api/pith-number/4HYE3WKWCCQODG3G5ZXR6VPAN3/graph.json","fetch_events":"https://pith.science/api/pith-number/4HYE3WKWCCQODG3G5ZXR6VPAN3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3/action/storage_attestation","attest_author":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3/action/author_attestation","sign_citation":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3/action/citation_signature","submit_replication":"https://pith.science/pith/4HYE3WKWCCQODG3G5ZXR6VPAN3/action/replication_record"}},"created_at":"2026-06-26T00:15:41.692737+00:00","updated_at":"2026-06-26T00:15:41.692737+00:00"}