{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:3U5PB7E2NAT4XAK7PURWGRI4IG","short_pith_number":"pith:3U5PB7E2","schema_version":"1.0","canonical_sha256":"dd3af0fc9a6827cb815f7d2363451c41b232ab33d42f7facff4b5f09f32952b2","source":{"kind":"arxiv","id":"2605.19537","version":1},"attestation_state":"computed","paper":{"title":"The Silent Hyperparameter: Quantifying the Impact of Inference Backends on LLM Reproducibility","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"David Pape, Jonathan Evertz, Lea Sch\\\"onherr","submitted_at":"2026-05-19T08:37:27Z","abstract_excerpt":"Progress in LLMs is increasingly measured through standardized benchmarks, where state-of-the-art improvements are often separated by fractions of a percentage point. At the same time, the computational cost of evaluating modern LLMs has driven widespread adoption of specialized inference backends, software systems that execute trained models efficiently at inference time. While critical for scalability, system-level optimizations, such as custom CUDA kernels and reduced-precision arithmetic, can alter token probabilities and introduce non-determinism, possibly cascading into divergent generat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19537","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-19T08:37:27Z","cross_cats_sorted":[],"title_canon_sha256":"ff9736917f6c55fdc03e07ee6ea442330b9705fb5c870dba092cab163d2b3f9c","abstract_canon_sha256":"f7ff3400cc531f8fb3ee1b9a0ba6fc62c4e131c46f19f8bfc99106c27abca6d7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:50.723269Z","signature_b64":"LTsnStQtHX0QHORcLKqAXmJdUx08DgKfuQ9iBJPjN26FB3052nN2bGo0Sv7tPm6VOwtvldrtXUXZQ262iNbPCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dd3af0fc9a6827cb815f7d2363451c41b232ab33d42f7facff4b5f09f32952b2","last_reissued_at":"2026-05-20T01:05:50.722516Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:50.722516Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Silent Hyperparameter: Quantifying the Impact of Inference Backends on LLM Reproducibility","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"David Pape, Jonathan Evertz, Lea Sch\\\"onherr","submitted_at":"2026-05-19T08:37:27Z","abstract_excerpt":"Progress in LLMs is increasingly measured through standardized benchmarks, where state-of-the-art improvements are often separated by fractions of a percentage point. At the same time, the computational cost of evaluating modern LLMs has driven widespread adoption of specialized inference backends, software systems that execute trained models efficiently at inference time. While critical for scalability, system-level optimizations, such as custom CUDA kernels and reduced-precision arithmetic, can alter token probabilities and introduce non-determinism, possibly cascading into divergent generat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19537","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19537/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19537","created_at":"2026-05-20T01:05:50.722649+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19537v1","created_at":"2026-05-20T01:05:50.722649+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19537","created_at":"2026-05-20T01:05:50.722649+00:00"},{"alias_kind":"pith_short_12","alias_value":"3U5PB7E2NAT4","created_at":"2026-05-20T01:05:50.722649+00:00"},{"alias_kind":"pith_short_16","alias_value":"3U5PB7E2NAT4XAK7","created_at":"2026-05-20T01:05:50.722649+00:00"},{"alias_kind":"pith_short_8","alias_value":"3U5PB7E2","created_at":"2026-05-20T01:05:50.722649+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG","json":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG.json","graph_json":"https://pith.science/api/pith-number/3U5PB7E2NAT4XAK7PURWGRI4IG/graph.json","events_json":"https://pith.science/api/pith-number/3U5PB7E2NAT4XAK7PURWGRI4IG/events.json","paper":"https://pith.science/paper/3U5PB7E2"},"agent_actions":{"view_html":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG","download_json":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG.json","view_paper":"https://pith.science/paper/3U5PB7E2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19537&json=true","fetch_graph":"https://pith.science/api/pith-number/3U5PB7E2NAT4XAK7PURWGRI4IG/graph.json","fetch_events":"https://pith.science/api/pith-number/3U5PB7E2NAT4XAK7PURWGRI4IG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG/action/storage_attestation","attest_author":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG/action/author_attestation","sign_citation":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG/action/citation_signature","submit_replication":"https://pith.science/pith/3U5PB7E2NAT4XAK7PURWGRI4IG/action/replication_record"}},"created_at":"2026-05-20T01:05:50.722649+00:00","updated_at":"2026-05-20T01:05:50.722649+00:00"}