{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:OD6773CYL5SC5OAUJPE4TOKFL2","short_pith_number":"pith:OD6773CY","schema_version":"1.0","canonical_sha256":"70fdffec585f642eb8144bc9c9b9455e880d21d4bbdde5d5c968bd0395e3b9e2","source":{"kind":"arxiv","id":"1807.07187","version":1},"attestation_state":"computed","paper":{"title":"Efficient Training on Very Large Corpora via Gramian Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"stat.ML","authors_text":"Ed Chi, John Anderson, Lichan Hong, Li Zhang, Nicolas Mayoraz, Steffen Rendle, Walid Krichene, Xinyang Yi","submitted_at":"2018-07-18T23:45:33Z","abstract_excerpt":"We study the problem of learning similarity functions over very large corpora using neural network embedding models. These models are typically trained using SGD with sampling of random observed and unobserved pairs, with a number of samples that grows quadratically with the corpus size, making it expensive to scale to very large corpora. We propose new efficient methods to train these models without having to sample unobserved pairs. Inspired by matrix factorization, our approach relies on adding a global quadratic penalty to all pairs of examples and expressing this term as the matrix-inner-"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1807.07187","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2018-07-18T23:45:33Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"cdc2e67dc9f2a9a6daac013479e63fe7694e754575a4174892c924b80f56b962","abstract_canon_sha256":"e89821c3c493c3489d5cb2f85681ea0f37f92e594e6f1ee8ba11ed055882b271"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:10:20.974757Z","signature_b64":"ris4/pjeyv7PN0f4lWud7+BPxfVeYLc/qdlZH2849NIHo6Phsq9USlfizUKoQSlndw1jcNDxUf+41L1VpdxJDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"70fdffec585f642eb8144bc9c9b9455e880d21d4bbdde5d5c968bd0395e3b9e2","last_reissued_at":"2026-05-18T00:10:20.974166Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:10:20.974166Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Efficient Training on Very Large Corpora via Gramian Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"stat.ML","authors_text":"Ed Chi, John Anderson, Lichan Hong, Li Zhang, Nicolas Mayoraz, Steffen Rendle, Walid Krichene, Xinyang Yi","submitted_at":"2018-07-18T23:45:33Z","abstract_excerpt":"We study the problem of learning similarity functions over very large corpora using neural network embedding models. These models are typically trained using SGD with sampling of random observed and unobserved pairs, with a number of samples that grows quadratically with the corpus size, making it expensive to scale to very large corpora. We propose new efficient methods to train these models without having to sample unobserved pairs. Inspired by matrix factorization, our approach relies on adding a global quadratic penalty to all pairs of examples and expressing this term as the matrix-inner-"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1807.07187","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1807.07187","created_at":"2026-05-18T00:10:20.974250+00:00"},{"alias_kind":"arxiv_version","alias_value":"1807.07187v1","created_at":"2026-05-18T00:10:20.974250+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1807.07187","created_at":"2026-05-18T00:10:20.974250+00:00"},{"alias_kind":"pith_short_12","alias_value":"OD6773CYL5SC","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"OD6773CYL5SC5OAU","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"OD6773CY","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2","json":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2.json","graph_json":"https://pith.science/api/pith-number/OD6773CYL5SC5OAUJPE4TOKFL2/graph.json","events_json":"https://pith.science/api/pith-number/OD6773CYL5SC5OAUJPE4TOKFL2/events.json","paper":"https://pith.science/paper/OD6773CY"},"agent_actions":{"view_html":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2","download_json":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2.json","view_paper":"https://pith.science/paper/OD6773CY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1807.07187&json=true","fetch_graph":"https://pith.science/api/pith-number/OD6773CYL5SC5OAUJPE4TOKFL2/graph.json","fetch_events":"https://pith.science/api/pith-number/OD6773CYL5SC5OAUJPE4TOKFL2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2/action/storage_attestation","attest_author":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2/action/author_attestation","sign_citation":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2/action/citation_signature","submit_replication":"https://pith.science/pith/OD6773CYL5SC5OAUJPE4TOKFL2/action/replication_record"}},"created_at":"2026-05-18T00:10:20.974250+00:00","updated_at":"2026-05-18T00:10:20.974250+00:00"}