{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:QWWIGDUQ23VOGQ3XNGWFTFKL6U","short_pith_number":"pith:QWWIGDUQ","schema_version":"1.0","canonical_sha256":"85ac830e90d6eae3437769ac59954bf513966891869cbba6f89b9fbf66e53239","source":{"kind":"arxiv","id":"1512.08183","version":5},"attestation_state":"computed","paper":{"title":"Learning Document Embeddings by Predicting N-grams for Sentiment Classification of Long Movie Reviews","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bofang Li, Deyuan Zhang, Tao Liu, Xiaoyong Du, Zhe Zhao","submitted_at":"2015-12-27T08:12:53Z","abstract_excerpt":"Despite the loss of semantic information, bag-of-ngram based methods still achieve state-of-the-art results for tasks such as sentiment classification of long movie reviews. Many document embeddings methods have been proposed to capture semantics, but they still can't outperform bag-of-ngram based methods on this task. In this paper, we modify the architecture of the recently proposed Paragraph Vector, allowing it to learn document vectors by predicting not only words, but n-gram features as well. Our model is able to capture both semantics and word order in documents while keeping the express"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1512.08183","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-12-27T08:12:53Z","cross_cats_sorted":[],"title_canon_sha256":"5b78c48173a77e8a007f75f6646c9e2cf0d6664395cd2a8db3b23bb0692cd5e7","abstract_canon_sha256":"de48e26e764d205e9657f58ce3bf9f47c9f82285e437a12d3829fe0d765e2cb3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:16:24.416892Z","signature_b64":"kXtvN2+IQb6/zvaDtgLOF1G8+U7IcZPf8qYbZGNaY0BIMMEhQOKIwd9gZuYYRI7zaZx83x5m11T0DuSqyw/sAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"85ac830e90d6eae3437769ac59954bf513966891869cbba6f89b9fbf66e53239","last_reissued_at":"2026-05-18T01:16:24.416162Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:16:24.416162Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Document Embeddings by Predicting N-grams for Sentiment Classification of Long Movie Reviews","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bofang Li, Deyuan Zhang, Tao Liu, Xiaoyong Du, Zhe Zhao","submitted_at":"2015-12-27T08:12:53Z","abstract_excerpt":"Despite the loss of semantic information, bag-of-ngram based methods still achieve state-of-the-art results for tasks such as sentiment classification of long movie reviews. Many document embeddings methods have been proposed to capture semantics, but they still can't outperform bag-of-ngram based methods on this task. In this paper, we modify the architecture of the recently proposed Paragraph Vector, allowing it to learn document vectors by predicting not only words, but n-gram features as well. Our model is able to capture both semantics and word order in documents while keeping the express"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.08183","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1512.08183","created_at":"2026-05-18T01:16:24.416274+00:00"},{"alias_kind":"arxiv_version","alias_value":"1512.08183v5","created_at":"2026-05-18T01:16:24.416274+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.08183","created_at":"2026-05-18T01:16:24.416274+00:00"},{"alias_kind":"pith_short_12","alias_value":"QWWIGDUQ23VO","created_at":"2026-05-18T12:29:39.896362+00:00"},{"alias_kind":"pith_short_16","alias_value":"QWWIGDUQ23VOGQ3X","created_at":"2026-05-18T12:29:39.896362+00:00"},{"alias_kind":"pith_short_8","alias_value":"QWWIGDUQ","created_at":"2026-05-18T12:29:39.896362+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U","json":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U.json","graph_json":"https://pith.science/api/pith-number/QWWIGDUQ23VOGQ3XNGWFTFKL6U/graph.json","events_json":"https://pith.science/api/pith-number/QWWIGDUQ23VOGQ3XNGWFTFKL6U/events.json","paper":"https://pith.science/paper/QWWIGDUQ"},"agent_actions":{"view_html":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U","download_json":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U.json","view_paper":"https://pith.science/paper/QWWIGDUQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1512.08183&json=true","fetch_graph":"https://pith.science/api/pith-number/QWWIGDUQ23VOGQ3XNGWFTFKL6U/graph.json","fetch_events":"https://pith.science/api/pith-number/QWWIGDUQ23VOGQ3XNGWFTFKL6U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U/action/storage_attestation","attest_author":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U/action/author_attestation","sign_citation":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U/action/citation_signature","submit_replication":"https://pith.science/pith/QWWIGDUQ23VOGQ3XNGWFTFKL6U/action/replication_record"}},"created_at":"2026-05-18T01:16:24.416274+00:00","updated_at":"2026-05-18T01:16:24.416274+00:00"}