{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:XR4JGHSU2QQVVMICQ52A3RJJOM","short_pith_number":"pith:XR4JGHSU","canonical_record":{"source":{"id":"2605.16617","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2026-05-15T20:37:49Z","cross_cats_sorted":[],"title_canon_sha256":"55ff7533e0d1ea6e01754bff217154a5b4c9af3e35c4186304d5500b3b07d9bd","abstract_canon_sha256":"7202c08d7290dacfc13424c7f0b728bb8c095152f517a436d62c3b8c1ceae93d"},"schema_version":"1.0"},"canonical_sha256":"bc78931e54d4215ab10287740dc529733a66a40194bdffc0efe3d6398b634e7c","source":{"kind":"arxiv","id":"2605.16617","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16617","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16617v1","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16617","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"pith_short_12","alias_value":"XR4JGHSU2QQV","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"pith_short_16","alias_value":"XR4JGHSU2QQVVMIC","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"pith_short_8","alias_value":"XR4JGHSU","created_at":"2026-05-20T00:02:32Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:XR4JGHSU2QQVVMICQ52A3RJJOM","target":"record","payload":{"canonical_record":{"source":{"id":"2605.16617","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2026-05-15T20:37:49Z","cross_cats_sorted":[],"title_canon_sha256":"55ff7533e0d1ea6e01754bff217154a5b4c9af3e35c4186304d5500b3b07d9bd","abstract_canon_sha256":"7202c08d7290dacfc13424c7f0b728bb8c095152f517a436d62c3b8c1ceae93d"},"schema_version":"1.0"},"canonical_sha256":"bc78931e54d4215ab10287740dc529733a66a40194bdffc0efe3d6398b634e7c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:32.741085Z","signature_b64":"ZE8g8lTxBS3meAIs0gBx2gLImIL7Al9yAAiMCVlZtHKV/t5V5afufzMpgJ+vVG83P8XzH9loLxhWOehQWVFhAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bc78931e54d4215ab10287740dc529733a66a40194bdffc0efe3d6398b634e7c","last_reissued_at":"2026-05-20T00:02:32.740281Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:32.740281Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.16617","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PZnd6jZcyh23MT+XkWxwg+yvZHgxbOQHrszz/FSuHC7K7u3VZD0Um8RfjzzH02yVKbGNFiJEX9OI/d/FJUmkCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T13:15:47.236804Z"},"content_sha256":"cc606d26b01c1bf1b8a91b12e01294dd186d59b6a65422ebea6b5647f443f1b7","schema_version":"1.0","event_id":"sha256:cc606d26b01c1bf1b8a91b12e01294dd186d59b6a65422ebea6b5647f443f1b7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:XR4JGHSU2QQVVMICQ52A3RJJOM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Exceeding the Numerical and Performance Characteristics of IEEE-754 SGEMM with BFloat16 Tensor Cores on GPUs for Scientific Computing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Using BFloat16 tensor cores with FP32 accumulation on GPUs exceeds the speed and numerical accuracy of native IEEE-754 FP32 SGEMM for scientific workloads.","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Addison Richards, Cherin Joseph, Cole Brower, Dmitry Lyakh, Greg Henry, Haicheng Wu, Harun Bayraktar, Jack Kosaian, John Gunnels, Lukas Mosimann, Paul Springer, Victor Podlozhnyuk","submitted_at":"2026-05-15T20:37:49Z","abstract_excerpt":"Largely due to their increased native capacity for numerical intensity and power efficiency, reduced-precision floating-point computing resources, primarily used in artificial intelligence (AI) applications, have expanded at a greater rate than their higher-precision relatives. This has led to various efforts focused upon leveraging plentiful reduced-precision hardware to mimic higher-precision mathematical calculations. This paper studies a specific use case, namely the use of bfloat16 (BF16) Tensor Cores found on modern GPUs in service of single precision (FP32) matrix multiply operations. G"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"This paper examines the performance, efficiency, power, and numerical characteristics of FP32 matrix multiplication via BF16-based emulation and demonstrates how it exceeds numerical and performance characteristics of native FP32 for scientific applications.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The assumption that BF16 tensor core operations accumulated into FP32 accumulators, combined with Blackwell-specific scaling hardware, produce results that are both faster and numerically superior to native IEEE-754 FP32 SGEMM across relevant scientific workloads without hidden accuracy losses from rounding or denormal handling.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"BF16 tensor cores on GPUs emulate FP32 SGEMM with superior performance, power efficiency, and numerical accuracy compared to native FP32, including a library implementation that handles denormals.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Using BFloat16 tensor cores with FP32 accumulation on GPUs exceeds the speed and numerical accuracy of native IEEE-754 FP32 SGEMM for scientific workloads.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4b0c5aa1ab44aa3988bb6edb84423819cee71a01ebf6e91070ab8c1d49841917"},"source":{"id":"2605.16617","kind":"arxiv","version":1},"verdict":{"id":"7b7b5b51-ed7b-48b6-9299-46cabc574d55","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T21:16:52.062290Z","strongest_claim":"This paper examines the performance, efficiency, power, and numerical characteristics of FP32 matrix multiplication via BF16-based emulation and demonstrates how it exceeds numerical and performance characteristics of native FP32 for scientific applications.","one_line_summary":"BF16 tensor cores on GPUs emulate FP32 SGEMM with superior performance, power efficiency, and numerical accuracy compared to native FP32, including a library implementation that handles denormals.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The assumption that BF16 tensor core operations accumulated into FP32 accumulators, combined with Blackwell-specific scaling hardware, produce results that are both faster and numerically superior to native IEEE-754 FP32 SGEMM across relevant scientific workloads without hidden accuracy losses from rounding or denormal handling.","pith_extraction_headline":"Using BFloat16 tensor cores with FP32 accumulation on GPUs exceeds the speed and numerical accuracy of native IEEE-754 FP32 SGEMM for scientific workloads."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16617/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T21:31:19.420862Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T21:21:31.117171Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T19:21:56.778112Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.590145Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"3e7dfb10ef7e19acf23dea2d09f79923179d5299262afb6199476bc170e0d0f0"},"references":{"count":27,"sample":[{"doi":"10.1038/s41586-","year":2019,"title":"Machine behaviour","work_id":"3fd8bfa9-ef44-41fd-bfdb-b38b1bde60ef","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2009,"title":"Baboulin, M., Buttari, A., Dongarra, J., Kurzak, J., Langou, J., Langou, J., Luszczek, P., and Tomov, S.Accelerating scientific computations with mixed precision algorithms.Computer Physics Communicat","work_id":"5a218db9-ad8a-43ab-b276-70b5663e8bf2","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Bayraktar, H., Charara, A., Clark, D., Cohen, S., Costa, T., Fang, Y.- L. L., Gao, Y., Guan, J., Gunnels, J., Haidar, A., Hehn, A., Hohnerbach, M., Jones, M., Lubowe, T., Lyakh, D., Morino, S., Spring","work_id":"ac2ba8ea-b5f2-414b-8b45-fe71c2ba5437","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1021/ct400250u","year":2013,"title":"E., and Sherrill, C","work_id":"65cc731c-60f5-49f6-a14c-c1416440f5a1","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Dongarra, J. J., Croz, J. D., Hammarling, S., and Hanson, R. J.Algo- rithm 656: An extended set of Fortran basic linear algebra subprograms: Model implementation and test programs. 18–32","work_id":"8a59ecfc-69da-418d-9e7f-41798eba2e98","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":27,"snapshot_sha256":"5507112fbf9a39189146a088098ac6239e1948b6150d54d3e8dc2e5eafc4bf34","internal_anchors":3},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7dbdb38ae6bdf77d134b2b33d538c7d98a855cb4c1aa3f45dfa22cb194eb9cad"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"7b7b5b51-ed7b-48b6-9299-46cabc574d55"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3N5utg1qZWvsk5H9sabcm/k803sVDnDJEpbpAGzYNNrNs5OM3dKP7xXOItnIOIoVsjA45KN26AJN6m7e8cM/AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T13:15:47.237565Z"},"content_sha256":"d42c86e75b04c74d703174669ce29f3647116b15db4f5e19b15331bb88656ec3","schema_version":"1.0","event_id":"sha256:d42c86e75b04c74d703174669ce29f3647116b15db4f5e19b15331bb88656ec3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XR4JGHSU2QQVVMICQ52A3RJJOM/bundle.json","state_url":"https://pith.science/pith/XR4JGHSU2QQVVMICQ52A3RJJOM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XR4JGHSU2QQVVMICQ52A3RJJOM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T13:15:47Z","links":{"resolver":"https://pith.science/pith/XR4JGHSU2QQVVMICQ52A3RJJOM","bundle":"https://pith.science/pith/XR4JGHSU2QQVVMICQ52A3RJJOM/bundle.json","state":"https://pith.science/pith/XR4JGHSU2QQVVMICQ52A3RJJOM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XR4JGHSU2QQVVMICQ52A3RJJOM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XR4JGHSU2QQVVMICQ52A3RJJOM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7202c08d7290dacfc13424c7f0b728bb8c095152f517a436d62c3b8c1ceae93d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2026-05-15T20:37:49Z","title_canon_sha256":"55ff7533e0d1ea6e01754bff217154a5b4c9af3e35c4186304d5500b3b07d9bd"},"schema_version":"1.0","source":{"id":"2605.16617","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16617","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16617v1","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16617","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"pith_short_12","alias_value":"XR4JGHSU2QQV","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"pith_short_16","alias_value":"XR4JGHSU2QQVVMIC","created_at":"2026-05-20T00:02:32Z"},{"alias_kind":"pith_short_8","alias_value":"XR4JGHSU","created_at":"2026-05-20T00:02:32Z"}],"graph_snapshots":[{"event_id":"sha256:d42c86e75b04c74d703174669ce29f3647116b15db4f5e19b15331bb88656ec3","target":"graph","created_at":"2026-05-20T00:02:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"This paper examines the performance, efficiency, power, and numerical characteristics of FP32 matrix multiplication via BF16-based emulation and demonstrates how it exceeds numerical and performance characteristics of native FP32 for scientific applications."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The assumption that BF16 tensor core operations accumulated into FP32 accumulators, combined with Blackwell-specific scaling hardware, produce results that are both faster and numerically superior to native IEEE-754 FP32 SGEMM across relevant scientific workloads without hidden accuracy losses from rounding or denormal handling."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"BF16 tensor cores on GPUs emulate FP32 SGEMM with superior performance, power efficiency, and numerical accuracy compared to native FP32, including a library implementation that handles denormals."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Using BFloat16 tensor cores with FP32 accumulation on GPUs exceeds the speed and numerical accuracy of native IEEE-754 FP32 SGEMM for scientific workloads."}],"snapshot_sha256":"4b0c5aa1ab44aa3988bb6edb84423819cee71a01ebf6e91070ab8c1d49841917"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7dbdb38ae6bdf77d134b2b33d538c7d98a855cb4c1aa3f45dfa22cb194eb9cad"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T21:31:19.420862Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T21:21:31.117171Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T19:21:56.778112Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.590145Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.16617/integrity.json","findings":[],"snapshot_sha256":"3e7dfb10ef7e19acf23dea2d09f79923179d5299262afb6199476bc170e0d0f0","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Largely due to their increased native capacity for numerical intensity and power efficiency, reduced-precision floating-point computing resources, primarily used in artificial intelligence (AI) applications, have expanded at a greater rate than their higher-precision relatives. This has led to various efforts focused upon leveraging plentiful reduced-precision hardware to mimic higher-precision mathematical calculations. This paper studies a specific use case, namely the use of bfloat16 (BF16) Tensor Cores found on modern GPUs in service of single precision (FP32) matrix multiply operations. G","authors_text":"Addison Richards, Cherin Joseph, Cole Brower, Dmitry Lyakh, Greg Henry, Haicheng Wu, Harun Bayraktar, Jack Kosaian, John Gunnels, Lukas Mosimann, Paul Springer, Victor Podlozhnyuk","cross_cats":[],"headline":"Using BFloat16 tensor cores with FP32 accumulation on GPUs exceeds the speed and numerical accuracy of native IEEE-754 FP32 SGEMM for scientific workloads.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2026-05-15T20:37:49Z","title":"Exceeding the Numerical and Performance Characteristics of IEEE-754 SGEMM with BFloat16 Tensor Cores on GPUs for Scientific Computing"},"references":{"count":27,"internal_anchors":3,"resolved_work":27,"sample":[{"cited_arxiv_id":"","doi":"10.1038/s41586-","is_internal_anchor":false,"ref_index":1,"title":"Machine behaviour","work_id":"3fd8bfa9-ef44-41fd-bfdb-b38b1bde60ef","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Baboulin, M., Buttari, A., Dongarra, J., Kurzak, J., Langou, J., Langou, J., Luszczek, P., and Tomov, S.Accelerating scientific computations with mixed precision algorithms.Computer Physics Communicat","work_id":"5a218db9-ad8a-43ab-b276-70b5663e8bf2","year":2009},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Bayraktar, H., Charara, A., Clark, D., Cohen, S., Costa, T., Fang, Y.- L. L., Gao, Y., Guan, J., Gunnels, J., Haidar, A., Hehn, A., Hohnerbach, M., Jones, M., Lubowe, T., Lyakh, D., Morino, S., Spring","work_id":"ac2ba8ea-b5f2-414b-8b45-fe71c2ba5437","year":2023},{"cited_arxiv_id":"","doi":"10.1021/ct400250u","is_internal_anchor":false,"ref_index":4,"title":"E., and Sherrill, C","work_id":"65cc731c-60f5-49f6-a14c-c1416440f5a1","year":2013},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Dongarra, J. J., Croz, J. D., Hammarling, S., and Hanson, R. J.Algo- rithm 656: An extended set of Fortran basic linear algebra subprograms: Model implementation and test programs. 18–32","work_id":"8a59ecfc-69da-418d-9e7f-41798eba2e98","year":null}],"snapshot_sha256":"5507112fbf9a39189146a088098ac6239e1948b6150d54d3e8dc2e5eafc4bf34"},"source":{"id":"2605.16617","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T21:16:52.062290Z","id":"7b7b5b51-ed7b-48b6-9299-46cabc574d55","model_set":{"reader":"grok-4.3"},"one_line_summary":"BF16 tensor cores on GPUs emulate FP32 SGEMM with superior performance, power efficiency, and numerical accuracy compared to native FP32, including a library implementation that handles denormals.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Using BFloat16 tensor cores with FP32 accumulation on GPUs exceeds the speed and numerical accuracy of native IEEE-754 FP32 SGEMM for scientific workloads.","strongest_claim":"This paper examines the performance, efficiency, power, and numerical characteristics of FP32 matrix multiplication via BF16-based emulation and demonstrates how it exceeds numerical and performance characteristics of native FP32 for scientific applications.","weakest_assumption":"The assumption that BF16 tensor core operations accumulated into FP32 accumulators, combined with Blackwell-specific scaling hardware, produce results that are both faster and numerically superior to native IEEE-754 FP32 SGEMM across relevant scientific workloads without hidden accuracy losses from rounding or denormal handling."}},"verdict_id":"7b7b5b51-ed7b-48b6-9299-46cabc574d55"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cc606d26b01c1bf1b8a91b12e01294dd186d59b6a65422ebea6b5647f443f1b7","target":"record","created_at":"2026-05-20T00:02:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7202c08d7290dacfc13424c7f0b728bb8c095152f517a436d62c3b8c1ceae93d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2026-05-15T20:37:49Z","title_canon_sha256":"55ff7533e0d1ea6e01754bff217154a5b4c9af3e35c4186304d5500b3b07d9bd"},"schema_version":"1.0","source":{"id":"2605.16617","kind":"arxiv","version":1}},"canonical_sha256":"bc78931e54d4215ab10287740dc529733a66a40194bdffc0efe3d6398b634e7c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bc78931e54d4215ab10287740dc529733a66a40194bdffc0efe3d6398b634e7c","first_computed_at":"2026-05-20T00:02:32.740281Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:32.740281Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ZE8g8lTxBS3meAIs0gBx2gLImIL7Al9yAAiMCVlZtHKV/t5V5afufzMpgJ+vVG83P8XzH9loLxhWOehQWVFhAw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:32.741085Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.16617","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cc606d26b01c1bf1b8a91b12e01294dd186d59b6a65422ebea6b5647f443f1b7","sha256:d42c86e75b04c74d703174669ce29f3647116b15db4f5e19b15331bb88656ec3"],"state_sha256":"1730015df789917ac4cc36b2da76c74b8a2ed4a6f0ebb8d49b2c0c4449feca18"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4twSGrZWIWJDqRClYvxUanDstLlyRxpmzIyeK7s2Zj8ZSVI2NvJ6i/rU2zH57tmJSwyu7ie1UeXbnqMGfT9PBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T13:15:47.240323Z","bundle_sha256":"def355977ff228745b3705020e80e9472a5bb33d473342befb674b7380150c05"}}