{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:6TWABMQUXQCLL4QVU5XA24WAUG","short_pith_number":"pith:6TWABMQU","schema_version":"1.0","canonical_sha256":"f4ec00b214bc04b5f215a76e0d72c0a1bbcc29bd75e44ae79724f20e1f738c3a","source":{"kind":"arxiv","id":"1406.7362","version":1},"attestation_state":"computed","paper":{"title":"Exponentially Increasing the Capacity-to-Computation Ratio for Conditional Computation in Deep Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"stat.ML","authors_text":"Kyunghyun Cho, Yoshua Bengio","submitted_at":"2014-06-28T06:45:51Z","abstract_excerpt":"Many state-of-the-art results obtained with deep networks are achieved with the largest models that could be trained, and if more computation power was available, we might be able to exploit much larger datasets in order to improve generalization ability. Whereas in learning algorithms such as decision trees the ratio of capacity (e.g., the number of parameters) to computation is very favorable (up to exponentially more parameters than computation), the ratio is essentially 1 for deep neural networks. Conditional computation has been proposed as a way to increase the capacity of a deep neural "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1406.7362","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2014-06-28T06:45:51Z","cross_cats_sorted":["cs.LG","cs.NE"],"title_canon_sha256":"b2a893f292253623b8bd2f9bdc030672a922fd18eae97bc84db0a89082cbed0c","abstract_canon_sha256":"c9e774937f1f8cd84f8e1c35f52ff4dfb0a2fa98e120d260fe7f12d7c7ebbba6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:48:45.553497Z","signature_b64":"xe2z4gXs/OAKjTQ22cEgazINX2V/6v/QMPfQpEcWIbYx1222A94H4jbP2rECrm38Hn8HJxx36mSMjaQ+au6yAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f4ec00b214bc04b5f215a76e0d72c0a1bbcc29bd75e44ae79724f20e1f738c3a","last_reissued_at":"2026-05-18T02:48:45.552793Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:48:45.552793Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Exponentially Increasing the Capacity-to-Computation Ratio for Conditional Computation in Deep Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.NE"],"primary_cat":"stat.ML","authors_text":"Kyunghyun Cho, Yoshua Bengio","submitted_at":"2014-06-28T06:45:51Z","abstract_excerpt":"Many state-of-the-art results obtained with deep networks are achieved with the largest models that could be trained, and if more computation power was available, we might be able to exploit much larger datasets in order to improve generalization ability. Whereas in learning algorithms such as decision trees the ratio of capacity (e.g., the number of parameters) to computation is very favorable (up to exponentially more parameters than computation), the ratio is essentially 1 for deep neural networks. Conditional computation has been proposed as a way to increase the capacity of a deep neural "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1406.7362","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1406.7362","created_at":"2026-05-18T02:48:45.552902+00:00"},{"alias_kind":"arxiv_version","alias_value":"1406.7362v1","created_at":"2026-05-18T02:48:45.552902+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1406.7362","created_at":"2026-05-18T02:48:45.552902+00:00"},{"alias_kind":"pith_short_12","alias_value":"6TWABMQUXQCL","created_at":"2026-05-18T12:28:16.859392+00:00"},{"alias_kind":"pith_short_16","alias_value":"6TWABMQUXQCLL4QV","created_at":"2026-05-18T12:28:16.859392+00:00"},{"alias_kind":"pith_short_8","alias_value":"6TWABMQU","created_at":"2026-05-18T12:28:16.859392+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2101.03961","citing_title":"Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity","ref_index":5,"is_internal_anchor":false},{"citing_arxiv_id":"2202.08906","citing_title":"ST-MoE: Designing Stable and Transferable Sparse Expert Models","ref_index":65,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19344","citing_title":"Quadruped Parkour Learning: Sparsely Gated Mixture of Experts with Visual Input","ref_index":35,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG","json":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG.json","graph_json":"https://pith.science/api/pith-number/6TWABMQUXQCLL4QVU5XA24WAUG/graph.json","events_json":"https://pith.science/api/pith-number/6TWABMQUXQCLL4QVU5XA24WAUG/events.json","paper":"https://pith.science/paper/6TWABMQU"},"agent_actions":{"view_html":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG","download_json":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG.json","view_paper":"https://pith.science/paper/6TWABMQU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1406.7362&json=true","fetch_graph":"https://pith.science/api/pith-number/6TWABMQUXQCLL4QVU5XA24WAUG/graph.json","fetch_events":"https://pith.science/api/pith-number/6TWABMQUXQCLL4QVU5XA24WAUG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG/action/storage_attestation","attest_author":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG/action/author_attestation","sign_citation":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG/action/citation_signature","submit_replication":"https://pith.science/pith/6TWABMQUXQCLL4QVU5XA24WAUG/action/replication_record"}},"created_at":"2026-05-18T02:48:45.552902+00:00","updated_at":"2026-05-18T02:48:45.552902+00:00"}