{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:CI2ZWZYYODFE7KTP5PFKV74ZJY","short_pith_number":"pith:CI2ZWZYY","schema_version":"1.0","canonical_sha256":"12359b671870ca4faa6febcaaaff994e1fd5ad0e1d992e1f8fd4f91c0e8bb0d5","source":{"kind":"arxiv","id":"2507.12415","version":2},"attestation_state":"computed","paper":{"title":"SWE-Perf: Can Language Models Optimize Code Performance on Real-World Repositories?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Lin Yan, Mingzhe Du, Qian Liu, Xinyi He, Yiming Huang, Yin Zheng, Zejian Yuan, Zejun Ma, Zhijie Fan","submitted_at":"2025-07-16T17:05:17Z","abstract_excerpt":"Code performance optimization is paramount in real-world software engineering and critical for production-level systems. While Large Language Models (LLMs) have demonstrated impressive capabilities in code generation and bug fixing, their proficiency in enhancing code performance at the repository level remains largely unexplored. To address this gap, we introduce SWE-Perf, the first benchmark specifically designed to systematically evaluate LLMs on code performance optimization tasks within authentic repository contexts. SWE-Perf comprises 140 carefully curated instances, each derived from pe"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2507.12415","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2025-07-16T17:05:17Z","cross_cats_sorted":[],"title_canon_sha256":"62f982fa34acf37aed8884ddbff8a6d4406cc1d5d3f760b7a0137398905016a5","abstract_canon_sha256":"b3b53e4693b5bcf76041a4edf4a4f53099f7fc70b05c04be476a0de3941bc28b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:17:13.205729Z","signature_b64":"nfwDrcmpyIhXyH8tLq+7o8GHu059xD9X2eCLKtXIXNJjJoDWRDeXtVfevz00hJ3thoIVQXVA2lf9wFnxmcCsAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"12359b671870ca4faa6febcaaaff994e1fd5ad0e1d992e1f8fd4f91c0e8bb0d5","last_reissued_at":"2026-07-02T01:17:13.205206Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:17:13.205206Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SWE-Perf: Can Language Models Optimize Code Performance on Real-World Repositories?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Lin Yan, Mingzhe Du, Qian Liu, Xinyi He, Yiming Huang, Yin Zheng, Zejian Yuan, Zejun Ma, Zhijie Fan","submitted_at":"2025-07-16T17:05:17Z","abstract_excerpt":"Code performance optimization is paramount in real-world software engineering and critical for production-level systems. While Large Language Models (LLMs) have demonstrated impressive capabilities in code generation and bug fixing, their proficiency in enhancing code performance at the repository level remains largely unexplored. To address this gap, we introduce SWE-Perf, the first benchmark specifically designed to systematically evaluate LLMs on code performance optimization tasks within authentic repository contexts. SWE-Perf comprises 140 carefully curated instances, each derived from pe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.12415","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.12415/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2507.12415","created_at":"2026-07-02T01:17:13.205266+00:00"},{"alias_kind":"arxiv_version","alias_value":"2507.12415v2","created_at":"2026-07-02T01:17:13.205266+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.12415","created_at":"2026-07-02T01:17:13.205266+00:00"},{"alias_kind":"pith_short_12","alias_value":"CI2ZWZYYODFE","created_at":"2026-07-02T01:17:13.205266+00:00"},{"alias_kind":"pith_short_16","alias_value":"CI2ZWZYYODFE7KTP","created_at":"2026-07-02T01:17:13.205266+00:00"},{"alias_kind":"pith_short_8","alias_value":"CI2ZWZYY","created_at":"2026-07-02T01:17:13.205266+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":10,"internal_anchor_count":10,"sample":[{"citing_arxiv_id":"2606.31767","citing_title":"JETO-Bench: A Reproducible Benchmark for Execution Time Improvement Patches in Java","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20244","citing_title":"Lean Refactor: Multi-Objective Controllable Proof Optimization via Agentic Strategy Search","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15815","citing_title":"BootstrapAgent: Distilling Repository Setup into Reusable Agent Knowledge","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17526","citing_title":"SaaSBench: Exploring the Boundaries of Coding Agents in Long-Horizon Enterprise SaaS Engineering","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15226","citing_title":"Is Agentic AI Ready for Real-World Hardware Engineering? A Deep Dive with Phoenix-bench","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08366","citing_title":"SWE Atlas: Benchmarking Coding Agents Beyond Issue Resolution","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10890","citing_title":"CppPerf: An Automated Pipeline and Dataset for Performance-Improving C++ Commits","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06068","citing_title":"VibeServe: Can AI Agents Build Bespoke LLM Serving Systems?","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19742","citing_title":"PlayCoder: Making LLM-Generated GUI Code Playable","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2604.05955","citing_title":"Does Pass Rate Tell the Whole Story? Evaluating Design Constraint Compliance in LLM-based Issue Resolution","ref_index":16,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY","json":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY.json","graph_json":"https://pith.science/api/pith-number/CI2ZWZYYODFE7KTP5PFKV74ZJY/graph.json","events_json":"https://pith.science/api/pith-number/CI2ZWZYYODFE7KTP5PFKV74ZJY/events.json","paper":"https://pith.science/paper/CI2ZWZYY"},"agent_actions":{"view_html":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY","download_json":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY.json","view_paper":"https://pith.science/paper/CI2ZWZYY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2507.12415&json=true","fetch_graph":"https://pith.science/api/pith-number/CI2ZWZYYODFE7KTP5PFKV74ZJY/graph.json","fetch_events":"https://pith.science/api/pith-number/CI2ZWZYYODFE7KTP5PFKV74ZJY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY/action/storage_attestation","attest_author":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY/action/author_attestation","sign_citation":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY/action/citation_signature","submit_replication":"https://pith.science/pith/CI2ZWZYYODFE7KTP5PFKV74ZJY/action/replication_record"}},"created_at":"2026-07-02T01:17:13.205266+00:00","updated_at":"2026-07-02T01:17:13.205266+00:00"}