{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:X2XC2RBRMDN52VK3PKH6BPCQBQ","short_pith_number":"pith:X2XC2RBR","schema_version":"1.0","canonical_sha256":"beae2d443160dbdd555b7a8fe0bc500c30d7835e2acc296d2e4dc4fdfab4c732","source":{"kind":"arxiv","id":"1606.06160","version":3},"attestation_state":"computed","paper":{"title":"DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"He Wen, Shuchang Zhou, Xinyu Zhou, Yuheng Zou, Yuxin Wu, Zekun Ni","submitted_at":"2016-06-20T15:02:31Z","abstract_excerpt":"We propose DoReFa-Net, a method to train convolutional neural networks that have low bitwidth weights and activations using low bitwidth parameter gradients. In particular, during backward pass, parameter gradients are stochastically quantized to low bitwidth numbers before being propagated to convolutional layers. As convolutions during forward/backward passes can now operate on low bitwidth weights and activations/gradients respectively, DoReFa-Net can use bit convolution kernels to accelerate both training and inference. Moreover, as bit convolutions can be efficiently implemented on CPU, F"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1606.06160","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.NE","submitted_at":"2016-06-20T15:02:31Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"f5c5654d85aae4aeed5ac597cebe9538f2eb9f0d9ac4260b3eda570dac417a2b","abstract_canon_sha256":"afb5cd0d291695dd979479df901d570d131776ba3455fc85ae2d3de11168c3f3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:24:35.593192Z","signature_b64":"Y5QG2ZoGWsmzQ4uhWb1ibNm6ACl4ga5yPViIYf9QcCsp+QIaZIvZuNdlVgugGtZZJSRFD89BZ7dtUIlU12WDAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"beae2d443160dbdd555b7a8fe0bc500c30d7835e2acc296d2e4dc4fdfab4c732","last_reissued_at":"2026-05-18T00:24:35.592739Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:24:35.592739Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"He Wen, Shuchang Zhou, Xinyu Zhou, Yuheng Zou, Yuxin Wu, Zekun Ni","submitted_at":"2016-06-20T15:02:31Z","abstract_excerpt":"We propose DoReFa-Net, a method to train convolutional neural networks that have low bitwidth weights and activations using low bitwidth parameter gradients. In particular, during backward pass, parameter gradients are stochastically quantized to low bitwidth numbers before being propagated to convolutional layers. As convolutions during forward/backward passes can now operate on low bitwidth weights and activations/gradients respectively, DoReFa-Net can use bit convolution kernels to accelerate both training and inference. Moreover, as bit convolutions can be efficiently implemented on CPU, F"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.06160","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1606.06160","created_at":"2026-05-18T00:24:35.592801+00:00"},{"alias_kind":"arxiv_version","alias_value":"1606.06160v3","created_at":"2026-05-18T00:24:35.592801+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.06160","created_at":"2026-05-18T00:24:35.592801+00:00"},{"alias_kind":"pith_short_12","alias_value":"X2XC2RBRMDN5","created_at":"2026-05-18T12:30:51.357362+00:00"},{"alias_kind":"pith_short_16","alias_value":"X2XC2RBRMDN52VK3","created_at":"2026-05-18T12:30:51.357362+00:00"},{"alias_kind":"pith_short_8","alias_value":"X2XC2RBR","created_at":"2026-05-18T12:30:51.357362+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":17,"internal_anchor_count":8,"sample":[{"citing_arxiv_id":"1907.00593","citing_title":"Weight Normalization based Quantization for Deep Neural Network Compression","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2408.00923","citing_title":"Reclaiming Residual Knowledge: A Novel Paradigm to Low-Bit Quantization","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22351","citing_title":"QuantSR+: Pushing the Limit of Quantized Image Super-Resolution Networks","ref_index":65,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20289","citing_title":"Plug-and-Play Spiking Operators: Breaking the Nonlinearity Bottleneck in Spiking Transformers","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21171","citing_title":"FTerViT: Fully Ternary Vision Transformer","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10989","citing_title":"SURGE: Surrogate Gradient Adaptation in Binary Neural Networks","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2509.03472","citing_title":"DPQuant: Efficient and Differentially-Private Model Training via Dynamic Quantization Scheduling","ref_index":54,"is_internal_anchor":true},{"citing_arxiv_id":"2209.05433","citing_title":"FP8 Formats for Deep Learning","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04988","citing_title":"Prune-Quantize-Distill: An Ordered Pipeline for Efficient Neural Network Compression","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10989","citing_title":"SURGE: Surrogate Gradient Adaptation in Binary Neural Networks","ref_index":20,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03396","citing_title":"Design and Implementation of BNN-Based Object Detection on FPGA","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"1710.03740","citing_title":"Mixed Precision Training","ref_index":34,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03396","citing_title":"Design and Implementation of BNN-Based Object Detection on FPGA","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.26979","citing_title":"Multibit neural inference in a N-ary crossbar architecture","ref_index":18,"is_internal_anchor":false},{"citing_arxiv_id":"2604.25903","citing_title":"Carbon-Taxed Transformers: A Green Compression Pipeline for Overgrown Language Models","ref_index":73,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05994","citing_title":"DiBA: Diagonal and Binary Matrix Approximation for Neural Network Weight Compression","ref_index":16,"is_internal_anchor":false},{"citing_arxiv_id":"2604.10861","citing_title":"Training single-electron and single-photon stochastic physical neural networks","ref_index":39,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ","json":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ.json","graph_json":"https://pith.science/api/pith-number/X2XC2RBRMDN52VK3PKH6BPCQBQ/graph.json","events_json":"https://pith.science/api/pith-number/X2XC2RBRMDN52VK3PKH6BPCQBQ/events.json","paper":"https://pith.science/paper/X2XC2RBR"},"agent_actions":{"view_html":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ","download_json":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ.json","view_paper":"https://pith.science/paper/X2XC2RBR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1606.06160&json=true","fetch_graph":"https://pith.science/api/pith-number/X2XC2RBRMDN52VK3PKH6BPCQBQ/graph.json","fetch_events":"https://pith.science/api/pith-number/X2XC2RBRMDN52VK3PKH6BPCQBQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ/action/storage_attestation","attest_author":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ/action/author_attestation","sign_citation":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ/action/citation_signature","submit_replication":"https://pith.science/pith/X2XC2RBRMDN52VK3PKH6BPCQBQ/action/replication_record"}},"created_at":"2026-05-18T00:24:35.592801+00:00","updated_at":"2026-05-18T00:24:35.592801+00:00"}