{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:4KUMRT7YUGMM6QKYZ4OOHJH22R","short_pith_number":"pith:4KUMRT7Y","schema_version":"1.0","canonical_sha256":"e2a8c8cff8a198cf4158cf1ce3a4fad449db8c9023ccb744fe6af2df44fc5082","source":{"kind":"arxiv","id":"1611.00712","version":3},"attestation_state":"computed","paper":{"title":"The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Andriy Mnih, Chris J. Maddison, Yee Whye Teh","submitted_at":"2016-11-02T18:25:40Z","abstract_excerpt":"The reparameterization trick enables optimizing large scale stochastic computation graphs via gradient descent. The essence of the trick is to refactor each stochastic node into a differentiable function of its parameters and a random variable with fixed distribution. After refactoring, the gradients of the loss propagated by the chain rule through the graph are low variance unbiased estimators of the gradients of the expected loss. While many continuous random variables have such reparameterizations, discrete random variables lack useful reparameterizations due to the discontinuous nature of "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1611.00712","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-02T18:25:40Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"5e962b24334319b430e9dc7184dbf6a917054cccf57d348e752263bfa9a290ec","abstract_canon_sha256":"a71e2286c2ffcf1234f45f4cb802d40ef50c261165e539ab14e754fb9ed5a955"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:49:31.864890Z","signature_b64":"omAd7+3qdKQvvawCL6qToIMoyjrzMS3kDfHZN3nWoKW17mmqVsbVw1IAlxX4QoAThtUEGHxRNqumpf3Uiel6BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e2a8c8cff8a198cf4158cf1ce3a4fad449db8c9023ccb744fe6af2df44fc5082","last_reissued_at":"2026-05-18T00:49:31.864050Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:49:31.864050Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Andriy Mnih, Chris J. Maddison, Yee Whye Teh","submitted_at":"2016-11-02T18:25:40Z","abstract_excerpt":"The reparameterization trick enables optimizing large scale stochastic computation graphs via gradient descent. The essence of the trick is to refactor each stochastic node into a differentiable function of its parameters and a random variable with fixed distribution. After refactoring, the gradients of the loss propagated by the chain rule through the graph are low variance unbiased estimators of the gradients of the expected loss. While many continuous random variables have such reparameterizations, discrete random variables lack useful reparameterizations due to the discontinuous nature of "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.00712","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1611.00712","created_at":"2026-05-18T00:49:31.864198+00:00"},{"alias_kind":"arxiv_version","alias_value":"1611.00712v3","created_at":"2026-05-18T00:49:31.864198+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.00712","created_at":"2026-05-18T00:49:31.864198+00:00"},{"alias_kind":"pith_short_12","alias_value":"4KUMRT7YUGMM","created_at":"2026-05-18T12:29:58.707656+00:00"},{"alias_kind":"pith_short_16","alias_value":"4KUMRT7YUGMM6QKY","created_at":"2026-05-18T12:29:58.707656+00:00"},{"alias_kind":"pith_short_8","alias_value":"4KUMRT7Y","created_at":"2026-05-18T12:29:58.707656+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":27,"internal_anchor_count":15,"sample":[{"citing_arxiv_id":"1906.12087","citing_title":"ARMIN: Towards a More Efficient and Light-weight Recurrent Memory Network","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"1907.00664","citing_title":"Learning World Graphs to Accelerate Hierarchical Reinforcement Learning","ref_index":62,"is_internal_anchor":true},{"citing_arxiv_id":"2406.09250","citing_title":"MirrorCheck: Efficient Adversarial Defense for Vision-Language Models","ref_index":59,"is_internal_anchor":true},{"citing_arxiv_id":"2006.12024","citing_title":"Bayesian Neural Networks: An Introduction and Survey","ref_index":97,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17568","citing_title":"Structured Neural Marked Point Processes for Interpretable Event Interaction Modeling","ref_index":59,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17568","citing_title":"Structured Neural Marked Point Processes for Interpretable Event Interaction Modeling","ref_index":59,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20088","citing_title":"INSHAPE: Instance-Level Shapelets for Interpretable Time-Series Classification","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18204","citing_title":"Forward-Learned Discrete Diffusion: Learning how to noise to denoise faster","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18556","citing_title":"GSQ: Highly-Accurate Low-Precision Scalar Quantization for LLMs via Gumbel-Softmax Sampling","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2511.12340","citing_title":"LILogic Net: Compact Logic Gate Networks with Learnable Connectivity for Efficient Hardware Deployment","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2602.08880","citing_title":"Differentiable Logical Programming for Quantum Circuit Discovery and Optimization","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2602.15451","citing_title":"Molecular Design beyond Training Data with Novel Extended Objective Functionals of Generative AI Models Driven by Quantum Annealing Computer","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2603.10225","citing_title":"Rethinking the Harmonic Loss via Non-Euclidean Distance Layers","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2603.15250","citing_title":"In-Context Symbolic Regression for Robustness-Improved Kolmogorov-Arnold Networks","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14297","citing_title":"Policy Optimization in Hybrid Discrete-Continuous Action Spaces via Mixed Gradients","ref_index":95,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10292","citing_title":"LeapTS: Rethinking Time Series Forecasting as Adaptive Multi-Horizon Scheduling","ref_index":113,"is_internal_anchor":false},{"citing_arxiv_id":"2605.00445","citing_title":"The Power of Order: Fooling LLMs with Adversarial Table Permutations","ref_index":31,"is_internal_anchor":false},{"citing_arxiv_id":"2604.26181","citing_title":"SWAN: World-Aware Adaptive Multimodal Networks for Runtime Variations","ref_index":20,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05769","citing_title":"Adaptive Selection of LoRA Components in Privacy-Preserving Federated Learning","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05096","citing_title":"CapsID: Soft-Routed Variable-Length Semantic IDs for Generative Recommendation","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01226","citing_title":"Arbitrarily Conditioned Hierarchical Flows for Spatiotemporal Events","ref_index":45,"is_internal_anchor":false},{"citing_arxiv_id":"2605.00670","citing_title":"Robust Multimodal Recommendation via Graph Retrieval-Enhanced Modality Completion","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"2605.00445","citing_title":"The Power of Order: Fooling LLMs with Adversarial Table Permutations","ref_index":31,"is_internal_anchor":false},{"citing_arxiv_id":"2604.10994","citing_title":"LumiMotion: Improving Gaussian Relighting with Scene Dynamics","ref_index":32,"is_internal_anchor":false},{"citing_arxiv_id":"2604.09955","citing_title":"Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation","ref_index":18,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R","json":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R.json","graph_json":"https://pith.science/api/pith-number/4KUMRT7YUGMM6QKYZ4OOHJH22R/graph.json","events_json":"https://pith.science/api/pith-number/4KUMRT7YUGMM6QKYZ4OOHJH22R/events.json","paper":"https://pith.science/paper/4KUMRT7Y"},"agent_actions":{"view_html":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R","download_json":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R.json","view_paper":"https://pith.science/paper/4KUMRT7Y","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1611.00712&json=true","fetch_graph":"https://pith.science/api/pith-number/4KUMRT7YUGMM6QKYZ4OOHJH22R/graph.json","fetch_events":"https://pith.science/api/pith-number/4KUMRT7YUGMM6QKYZ4OOHJH22R/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R/action/storage_attestation","attest_author":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R/action/author_attestation","sign_citation":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R/action/citation_signature","submit_replication":"https://pith.science/pith/4KUMRT7YUGMM6QKYZ4OOHJH22R/action/replication_record"}},"created_at":"2026-05-18T00:49:31.864198+00:00","updated_at":"2026-05-18T00:49:31.864198+00:00"}