{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:OPGITI7GAGBIQOQLKQMQCUAFNV","short_pith_number":"pith:OPGITI7G","schema_version":"1.0","canonical_sha256":"73cc89a3e60182883a0b54190150056d4494899183538caab0349b41cb0f1cfa","source":{"kind":"arxiv","id":"1606.08061","version":1},"attestation_state":"computed","paper":{"title":"Exact gradient updates in time independent of output size for the spherical loss family","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"Alexandre de Br\\'ebisson, Pascal Vincent, Xavier Bouthillier","submitted_at":"2016-06-26T17:57:36Z","abstract_excerpt":"An important class of problems involves training deep neural networks with sparse prediction targets of very high dimension D. These occur naturally in e.g. neural language models or the learning of word-embeddings, often posed as predicting the probability of next words among a vocabulary of size D (e.g. 200,000). Computing the equally large, but typically non-sparse D-dimensional output vector from a last hidden layer of reasonable dimension d (e.g. 500) incurs a prohibitive O(Dd) computational cost for each example, as does updating the $D \\times d$ output weight matrix and computing the gr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1606.08061","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.NE","submitted_at":"2016-06-26T17:57:36Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"5ed0667d037ccb356ab0b310e47ca63c8a1936e4ef4cbea812b00c4c49155b01","abstract_canon_sha256":"fed1d4f4cf55e5f1d1931dc6cc2737dc91a3bdded2f0deed6f9332a49ede9529"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:11:53.927871Z","signature_b64":"cfbyGCxa7Lr24xTtC5s6pjlDN4Ry+5Urvj1at4QHvZb/xgmcNCKDaCazX4TXuWRJ6TkG0SMXdBOD2RCRqEiaDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"73cc89a3e60182883a0b54190150056d4494899183538caab0349b41cb0f1cfa","last_reissued_at":"2026-05-18T01:11:53.927513Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:11:53.927513Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Exact gradient updates in time independent of output size for the spherical loss family","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.NE","authors_text":"Alexandre de Br\\'ebisson, Pascal Vincent, Xavier Bouthillier","submitted_at":"2016-06-26T17:57:36Z","abstract_excerpt":"An important class of problems involves training deep neural networks with sparse prediction targets of very high dimension D. These occur naturally in e.g. neural language models or the learning of word-embeddings, often posed as predicting the probability of next words among a vocabulary of size D (e.g. 200,000). Computing the equally large, but typically non-sparse D-dimensional output vector from a last hidden layer of reasonable dimension d (e.g. 500) incurs a prohibitive O(Dd) computational cost for each example, as does updating the $D \\times d$ output weight matrix and computing the gr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.08061","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1606.08061","created_at":"2026-05-18T01:11:53.927569+00:00"},{"alias_kind":"arxiv_version","alias_value":"1606.08061v1","created_at":"2026-05-18T01:11:53.927569+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.08061","created_at":"2026-05-18T01:11:53.927569+00:00"},{"alias_kind":"pith_short_12","alias_value":"OPGITI7GAGBI","created_at":"2026-05-18T12:30:36.002864+00:00"},{"alias_kind":"pith_short_16","alias_value":"OPGITI7GAGBIQOQL","created_at":"2026-05-18T12:30:36.002864+00:00"},{"alias_kind":"pith_short_8","alias_value":"OPGITI7G","created_at":"2026-05-18T12:30:36.002864+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV","json":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV.json","graph_json":"https://pith.science/api/pith-number/OPGITI7GAGBIQOQLKQMQCUAFNV/graph.json","events_json":"https://pith.science/api/pith-number/OPGITI7GAGBIQOQLKQMQCUAFNV/events.json","paper":"https://pith.science/paper/OPGITI7G"},"agent_actions":{"view_html":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV","download_json":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV.json","view_paper":"https://pith.science/paper/OPGITI7G","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1606.08061&json=true","fetch_graph":"https://pith.science/api/pith-number/OPGITI7GAGBIQOQLKQMQCUAFNV/graph.json","fetch_events":"https://pith.science/api/pith-number/OPGITI7GAGBIQOQLKQMQCUAFNV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV/action/storage_attestation","attest_author":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV/action/author_attestation","sign_citation":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV/action/citation_signature","submit_replication":"https://pith.science/pith/OPGITI7GAGBIQOQLKQMQCUAFNV/action/replication_record"}},"created_at":"2026-05-18T01:11:53.927569+00:00","updated_at":"2026-05-18T01:11:53.927569+00:00"}