{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:5DIHYPBSEDKWZK7SMXV3QEDNJ4","short_pith_number":"pith:5DIHYPBS","canonical_record":{"source":{"id":"1612.06778","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-12-20T17:38:57Z","cross_cats_sorted":[],"title_canon_sha256":"59baf916dba4858c399c9ceab92303afce6633e35d3510105ab82cd911d9e040","abstract_canon_sha256":"3871b1a5503e0fbedc82fbaee9e2366bf7540453dc2649072ccc5cccfdd772da"},"schema_version":"1.0"},"canonical_sha256":"e8d07c3c3220d56cabf265ebb8106d4f05a906a9d5aafbf973dc25c18efc6b6f","source":{"kind":"arxiv","id":"1612.06778","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1612.06778","created_at":"2026-05-18T00:44:38Z"},{"alias_kind":"arxiv_version","alias_value":"1612.06778v3","created_at":"2026-05-18T00:44:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.06778","created_at":"2026-05-18T00:44:38Z"},{"alias_kind":"pith_short_12","alias_value":"5DIHYPBSEDKW","created_at":"2026-05-18T12:30:01Z"},{"alias_kind":"pith_short_16","alias_value":"5DIHYPBSEDKWZK7S","created_at":"2026-05-18T12:30:01Z"},{"alias_kind":"pith_short_8","alias_value":"5DIHYPBS","created_at":"2026-05-18T12:30:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:5DIHYPBSEDKWZK7SMXV3QEDNJ4","target":"record","payload":{"canonical_record":{"source":{"id":"1612.06778","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-12-20T17:38:57Z","cross_cats_sorted":[],"title_canon_sha256":"59baf916dba4858c399c9ceab92303afce6633e35d3510105ab82cd911d9e040","abstract_canon_sha256":"3871b1a5503e0fbedc82fbaee9e2366bf7540453dc2649072ccc5cccfdd772da"},"schema_version":"1.0"},"canonical_sha256":"e8d07c3c3220d56cabf265ebb8106d4f05a906a9d5aafbf973dc25c18efc6b6f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:44:38.728584Z","signature_b64":"Wc1dqRgeZkrojyt54ZD8+c9b87IFlnNlnInn4tN18aQW4ODlSjwEyixZxLR4s8iFD9jUPUi6rfOcCI3KGMzjAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e8d07c3c3220d56cabf265ebb8106d4f05a906a9d5aafbf973dc25c18efc6b6f","last_reissued_at":"2026-05-18T00:44:38.728121Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:44:38.728121Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1612.06778","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:44:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LyfuMzPKuQyVgnYXvK9W+2ivvtjLRlUh3qpH94/J76CLkKzYNTj21Rj5zD+jxnyA+jycgeFnTnf2Hf3zjPjACw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T17:50:04.533869Z"},"content_sha256":"116918cac44818577da1c09af9a546d7fa4e150ab9a4c6727a4e772912d5d78c","schema_version":"1.0","event_id":"sha256:116918cac44818577da1c09af9a546d7fa4e150ab9a4c6727a4e772912d5d78c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:5DIHYPBSEDKWZK7SMXV3QEDNJ4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SCDV : Sparse Composite Document Vectors using soft clustering over distributional representations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bhargavi Paranjape, Dheeraj Mekala, Harish Karnick, Vivek Gupta","submitted_at":"2016-12-20T17:38:57Z","abstract_excerpt":"We present a feature vector formation technique for documents - Sparse Composite Document Vector (SCDV) - which overcomes several shortcomings of the current distributional paragraph vector representations that are widely used for text representation. In SCDV, word embedding's are clustered to capture multiple semantic contexts in which words occur. They are then chained together to form document topic-vectors that can express complex, multi-topic documents. Through extensive experiments on multi-class and multi-label classification tasks, we outperform the previous state-of-the-art method, NT"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.06778","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:44:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aDurfef6GeVJEpCjVTKRPE/TEToOvHHgol45stkvJ0gkRZlpnWSQqOsEoZKSBw8ACxnaxcgqNmvJGPJOoSdIDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T17:50:04.534241Z"},"content_sha256":"d807151ea26b245955ffda10e34d056ae4da1a77de5cba195f94de78efb54e02","schema_version":"1.0","event_id":"sha256:d807151ea26b245955ffda10e34d056ae4da1a77de5cba195f94de78efb54e02"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4/bundle.json","state_url":"https://pith.science/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T17:50:04Z","links":{"resolver":"https://pith.science/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4","bundle":"https://pith.science/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4/bundle.json","state":"https://pith.science/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5DIHYPBSEDKWZK7SMXV3QEDNJ4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:5DIHYPBSEDKWZK7SMXV3QEDNJ4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3871b1a5503e0fbedc82fbaee9e2366bf7540453dc2649072ccc5cccfdd772da","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-12-20T17:38:57Z","title_canon_sha256":"59baf916dba4858c399c9ceab92303afce6633e35d3510105ab82cd911d9e040"},"schema_version":"1.0","source":{"id":"1612.06778","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1612.06778","created_at":"2026-05-18T00:44:38Z"},{"alias_kind":"arxiv_version","alias_value":"1612.06778v3","created_at":"2026-05-18T00:44:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.06778","created_at":"2026-05-18T00:44:38Z"},{"alias_kind":"pith_short_12","alias_value":"5DIHYPBSEDKW","created_at":"2026-05-18T12:30:01Z"},{"alias_kind":"pith_short_16","alias_value":"5DIHYPBSEDKWZK7S","created_at":"2026-05-18T12:30:01Z"},{"alias_kind":"pith_short_8","alias_value":"5DIHYPBS","created_at":"2026-05-18T12:30:01Z"}],"graph_snapshots":[{"event_id":"sha256:d807151ea26b245955ffda10e34d056ae4da1a77de5cba195f94de78efb54e02","target":"graph","created_at":"2026-05-18T00:44:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present a feature vector formation technique for documents - Sparse Composite Document Vector (SCDV) - which overcomes several shortcomings of the current distributional paragraph vector representations that are widely used for text representation. In SCDV, word embedding's are clustered to capture multiple semantic contexts in which words occur. They are then chained together to form document topic-vectors that can express complex, multi-topic documents. Through extensive experiments on multi-class and multi-label classification tasks, we outperform the previous state-of-the-art method, NT","authors_text":"Bhargavi Paranjape, Dheeraj Mekala, Harish Karnick, Vivek Gupta","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-12-20T17:38:57Z","title":"SCDV : Sparse Composite Document Vectors using soft clustering over distributional representations"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.06778","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:116918cac44818577da1c09af9a546d7fa4e150ab9a4c6727a4e772912d5d78c","target":"record","created_at":"2026-05-18T00:44:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3871b1a5503e0fbedc82fbaee9e2366bf7540453dc2649072ccc5cccfdd772da","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-12-20T17:38:57Z","title_canon_sha256":"59baf916dba4858c399c9ceab92303afce6633e35d3510105ab82cd911d9e040"},"schema_version":"1.0","source":{"id":"1612.06778","kind":"arxiv","version":3}},"canonical_sha256":"e8d07c3c3220d56cabf265ebb8106d4f05a906a9d5aafbf973dc25c18efc6b6f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e8d07c3c3220d56cabf265ebb8106d4f05a906a9d5aafbf973dc25c18efc6b6f","first_computed_at":"2026-05-18T00:44:38.728121Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:44:38.728121Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Wc1dqRgeZkrojyt54ZD8+c9b87IFlnNlnInn4tN18aQW4ODlSjwEyixZxLR4s8iFD9jUPUi6rfOcCI3KGMzjAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:44:38.728584Z","signed_message":"canonical_sha256_bytes"},"source_id":"1612.06778","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:116918cac44818577da1c09af9a546d7fa4e150ab9a4c6727a4e772912d5d78c","sha256:d807151ea26b245955ffda10e34d056ae4da1a77de5cba195f94de78efb54e02"],"state_sha256":"fa0620eb804ab266d1a43a5cd5d4026fdf04d94cf9c1406c0ba9e1f1420360d0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"D0/0OsrRwxJ/4oLlDJSskMGMYHR5fRjTb5YNsIyn5xvXa+03hQJGc1+ugUtDv9pytBq2g5opLuMlBWwfbOVnDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T17:50:04.537297Z","bundle_sha256":"d36ca4c487ede46a58fbab29b7347dafdb465c0042a78d73b0f2e62bb83f70cf"}}