{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:FHPN6PIUTIZ72DNJ6WAUUQRAK6","short_pith_number":"pith:FHPN6PIU","schema_version":"1.0","canonical_sha256":"29dedf3d149a33fd0da9f5814a422057bf4df4d6cc9a502e4cd9cca979605b08","source":{"kind":"arxiv","id":"2511.21285","version":3},"attestation_state":"computed","paper":{"title":"PEFT-Bench: A Parameter-Efficient Fine-Tuning Methods Benchmark","license":"http://creativecommons.org/licenses/by/4.0/","headline":"PEFT-Bench offers a standardized way to compare parameter-efficient fine-tuning methods for large language models while factoring in training and inference costs.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Branislav Pecher, Ivan Srba, Maria Bielikova, Robert Belanec","submitted_at":"2025-11-26T11:18:06Z","abstract_excerpt":"Despite the state-of-the-art performance of Large Language Models (LLMs) achieved on many tasks, their massive scale often leads to high computational and environmental costs, limiting their accessibility. Parameter-Efficient Fine-Tuning (PEFT) methods address this challenge by reducing the number of trainable parameters while maintaining strong downstream performance. Despite the advances in PEFT methods, current evaluations remain limited (in terms of evaluated models and datasets) and difficult to reproduce. To bridge this gap, we introduce PEFT-Bench, a unified end-to-end benchmark for eva"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2511.21285","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-11-26T11:18:06Z","cross_cats_sorted":[],"title_canon_sha256":"c89f7bef98f8708346256ccd3fac07f49c8b339380cc8a53a8b22757b620fc58","abstract_canon_sha256":"061f942babfc7a00a8c10427f503abb2c2932895185c7def1112ace880ad0707"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:10:11.766080Z","signature_b64":"bt0axwBYZc468w8iRKn3/hECqc2Qz3CJlk+yuBGtepPfjEIyN+SOB+hUZruSPRzTTdAJEoASdoaZdO7JozTwCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"29dedf3d149a33fd0da9f5814a422057bf4df4d6cc9a502e4cd9cca979605b08","last_reissued_at":"2026-05-18T03:10:11.765326Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:10:11.765326Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PEFT-Bench: A Parameter-Efficient Fine-Tuning Methods Benchmark","license":"http://creativecommons.org/licenses/by/4.0/","headline":"PEFT-Bench offers a standardized way to compare parameter-efficient fine-tuning methods for large language models while factoring in training and inference costs.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Branislav Pecher, Ivan Srba, Maria Bielikova, Robert Belanec","submitted_at":"2025-11-26T11:18:06Z","abstract_excerpt":"Despite the state-of-the-art performance of Large Language Models (LLMs) achieved on many tasks, their massive scale often leads to high computational and environmental costs, limiting their accessibility. Parameter-Efficient Fine-Tuning (PEFT) methods address this challenge by reducing the number of trainable parameters while maintaining strong downstream performance. Despite the advances in PEFT methods, current evaluations remain limited (in terms of evaluated models and datasets) and difficult to reproduce. To bridge this gap, we introduce PEFT-Bench, a unified end-to-end benchmark for eva"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We introduce PEFT-Bench, a unified end-to-end benchmark for evaluating diverse PEFT methods on autoregressive LLMs. We demonstrate its usage across 27 NLP datasets and 7 PEFT methods. To account for different PEFT training and inference factors, we also introduce the PEFT Soft Cost Penalties (PSCP) metric, which takes trainable parameters, inference speed, and training memory usage into account.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the chosen 27 NLP datasets and 7 PEFT methods form a sufficiently representative sample to support general conclusions about PEFT method quality and that the PSCP weighting of cost factors produces practically useful rankings.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"PEFT-Bench is a standardized end-to-end benchmark for 7 PEFT methods across 27 NLP datasets on autoregressive LLMs, accompanied by the PSCP metric that penalizes based on trainable parameters, inference speed, and training memory.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"PEFT-Bench offers a standardized way to compare parameter-efficient fine-tuning methods for large language models while factoring in training and inference costs.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"1201eb2c1c41cc2e89ec4fc389b20982b32916f4159635ed9dbb82ec3e44a45c"},"source":{"id":"2511.21285","kind":"arxiv","version":3},"verdict":{"id":"8c4611c8-c32f-4ac3-a08e-90041de4196e","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T05:04:49.167491Z","strongest_claim":"We introduce PEFT-Bench, a unified end-to-end benchmark for evaluating diverse PEFT methods on autoregressive LLMs. We demonstrate its usage across 27 NLP datasets and 7 PEFT methods. To account for different PEFT training and inference factors, we also introduce the PEFT Soft Cost Penalties (PSCP) metric, which takes trainable parameters, inference speed, and training memory usage into account.","one_line_summary":"PEFT-Bench is a standardized end-to-end benchmark for 7 PEFT methods across 27 NLP datasets on autoregressive LLMs, accompanied by the PSCP metric that penalizes based on trainable parameters, inference speed, and training memory.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the chosen 27 NLP datasets and 7 PEFT methods form a sufficiently representative sample to support general conclusions about PEFT method quality and that the PSCP weighting of cost factors produces practically useful rankings.","pith_extraction_headline":"PEFT-Bench offers a standardized way to compare parameter-efficient fine-tuning methods for large language models while factoring in training and inference costs."},"references":{"count":71,"sample":[{"doi":"","year":null,"title":"online\" 'onlinestring :=","work_id":"38c07273-5071-4bbb-b2af-067af11becc7","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"write newline","work_id":"b6ace3ad-bd44-4a95-86ee-c7b73bd4cda2","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","ref_index":3,"cited_arxiv_id":"2303.08774","is_internal_anchor":true},{"doi":"10.18653/v1/n19-1245","year":2019,"title":"M ath QA : Towards interpretable math word problem solving with operation-based formalisms","work_id":"b73d762e-5f33-41a0-a44d-b2e613bffd36","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.18653/v1/2022.emnlp-main.446","year":2022,"title":"Akari Asai, Mohammadreza Salehi, Matthew Peters, and Hannaneh Hajishirzi. 2022. https://doi.org/10.18653/v1/2022.emnlp-main.446 ATTEMPT : Parameter-efficient multi-task tuning via attentional mixtures","work_id":"975310f8-a8ce-4272-95b4-5eb92ddaa3f7","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":71,"snapshot_sha256":"81af34d9d089004496e69c7c7cfaa2b226c7791223bfe9d1e3a3182189f440f9","internal_anchors":12},"formal_canon":{"evidence_count":1,"snapshot_sha256":"630de9fec1d062469a531b2751d0d05fa1d47b41e54d2c7085b5989d023d93e9"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.21285","created_at":"2026-05-18T03:10:11.765454+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.21285v3","created_at":"2026-05-18T03:10:11.765454+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.21285","created_at":"2026-05-18T03:10:11.765454+00:00"},{"alias_kind":"pith_short_12","alias_value":"FHPN6PIUTIZ7","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"FHPN6PIUTIZ72DNJ","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"FHPN6PIU","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2512.02764","citing_title":"PEFT-Factory: Unified Parameter-Efficient Fine-Tuning of Autoregressive Large Language Models","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08956","citing_title":"Low-Data Supervised Adaptation Outperforms Prompting for Cloud Segmentation Under Domain Shift","ref_index":13,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":1,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6","json":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6.json","graph_json":"https://pith.science/api/pith-number/FHPN6PIUTIZ72DNJ6WAUUQRAK6/graph.json","events_json":"https://pith.science/api/pith-number/FHPN6PIUTIZ72DNJ6WAUUQRAK6/events.json","paper":"https://pith.science/paper/FHPN6PIU"},"agent_actions":{"view_html":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6","download_json":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6.json","view_paper":"https://pith.science/paper/FHPN6PIU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.21285&json=true","fetch_graph":"https://pith.science/api/pith-number/FHPN6PIUTIZ72DNJ6WAUUQRAK6/graph.json","fetch_events":"https://pith.science/api/pith-number/FHPN6PIUTIZ72DNJ6WAUUQRAK6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6/action/storage_attestation","attest_author":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6/action/author_attestation","sign_citation":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6/action/citation_signature","submit_replication":"https://pith.science/pith/FHPN6PIUTIZ72DNJ6WAUUQRAK6/action/replication_record"}},"created_at":"2026-05-18T03:10:11.765454+00:00","updated_at":"2026-05-18T03:10:11.765454+00:00"}