{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:FIU7OEFAHAHX543CQOKQJCXGJZ","short_pith_number":"pith:FIU7OEFA","schema_version":"1.0","canonical_sha256":"2a29f710a0380f7ef3628395048ae64e6ad773cc9ffc02b2b222d66e6c4fbd1d","source":{"kind":"arxiv","id":"1503.02406","version":1},"attestation_state":"computed","paper":{"title":"Deep Learning and the Information Bottleneck Principle","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Naftali Tishby, Noga Zaslavsky","submitted_at":"2015-03-09T09:39:41Z","abstract_excerpt":"Deep Neural Networks (DNNs) are analyzed via the theoretical framework of the information bottleneck (IB) principle. We first show that any DNN can be quantified by the mutual information between the layers and the input and output variables. Using this representation we can calculate the optimal information theoretic limits of the DNN and obtain finite sample generalization bounds. The advantage of getting closer to the theoretical limit is quantifiable both by the generalization bound and by the network's simplicity. We argue that both the optimal architecture, number of layers and features/"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1503.02406","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2015-03-09T09:39:41Z","cross_cats_sorted":[],"title_canon_sha256":"1b4eff5e34d396f2f19db12cd049ef0c1cdb8bede493ded9a86a6f16b92d75c5","abstract_canon_sha256":"1530f184c19b55e13aaf88196d07f6d49947e8d33ef1650985363a293f96fa32"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:25:25.203480Z","signature_b64":"2Ftw1IKydveNxr2IvgmetssKRRxxQ1va88XS9vViAxSdiojf3va3/J05EKG4u/+dFOnWWPC4X4CsCj3JliH6Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2a29f710a0380f7ef3628395048ae64e6ad773cc9ffc02b2b222d66e6c4fbd1d","last_reissued_at":"2026-05-18T02:25:25.203123Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:25:25.203123Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Deep Learning and the Information Bottleneck Principle","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Naftali Tishby, Noga Zaslavsky","submitted_at":"2015-03-09T09:39:41Z","abstract_excerpt":"Deep Neural Networks (DNNs) are analyzed via the theoretical framework of the information bottleneck (IB) principle. We first show that any DNN can be quantified by the mutual information between the layers and the input and output variables. Using this representation we can calculate the optimal information theoretic limits of the DNN and obtain finite sample generalization bounds. The advantage of getting closer to the theoretical limit is quantifiable both by the generalization bound and by the network's simplicity. We argue that both the optimal architecture, number of layers and features/"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1503.02406","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1503.02406","created_at":"2026-05-18T02:25:25.203177+00:00"},{"alias_kind":"arxiv_version","alias_value":"1503.02406v1","created_at":"2026-05-18T02:25:25.203177+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1503.02406","created_at":"2026-05-18T02:25:25.203177+00:00"},{"alias_kind":"pith_short_12","alias_value":"FIU7OEFAHAHX","created_at":"2026-05-18T12:29:19.899920+00:00"},{"alias_kind":"pith_short_16","alias_value":"FIU7OEFAHAHX543C","created_at":"2026-05-18T12:29:19.899920+00:00"},{"alias_kind":"pith_short_8","alias_value":"FIU7OEFA","created_at":"2026-05-18T12:29:19.899920+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2603.15842","citing_title":"Informationally Compressive Anonymization: Non-Degrading Sensitive Input Protection for Privacy-Preserving Supervised Machine Learning","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18603","citing_title":"Starve to Perceive: Taming Lazy Perception in VLMs with Constrained Visual Bandwidth","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12536","citing_title":"Information as Maximum-Caliber Deviation: A bridge between Integrated Information Theory and the Free Energy Principle","ref_index":150,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06394","citing_title":"Lecture Notes on Statistical Physics and Neural Networks","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2604.12049","citing_title":"Leveraging Weighted Syntactic and Semantic Context Assessment Summary (wSSAS) Towards Text Categorization Using LLMs","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15547","citing_title":"Consistency Analysis of Sentiment Predictions using Syntactic & Semantic Context Assessment Summarization (SSAS)","ref_index":35,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ","json":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ.json","graph_json":"https://pith.science/api/pith-number/FIU7OEFAHAHX543CQOKQJCXGJZ/graph.json","events_json":"https://pith.science/api/pith-number/FIU7OEFAHAHX543CQOKQJCXGJZ/events.json","paper":"https://pith.science/paper/FIU7OEFA"},"agent_actions":{"view_html":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ","download_json":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ.json","view_paper":"https://pith.science/paper/FIU7OEFA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1503.02406&json=true","fetch_graph":"https://pith.science/api/pith-number/FIU7OEFAHAHX543CQOKQJCXGJZ/graph.json","fetch_events":"https://pith.science/api/pith-number/FIU7OEFAHAHX543CQOKQJCXGJZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ/action/storage_attestation","attest_author":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ/action/author_attestation","sign_citation":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ/action/citation_signature","submit_replication":"https://pith.science/pith/FIU7OEFAHAHX543CQOKQJCXGJZ/action/replication_record"}},"created_at":"2026-05-18T02:25:25.203177+00:00","updated_at":"2026-05-18T02:25:25.203177+00:00"}