{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:4ZOVL5SSKOXTS5V56B262GZ3W6","short_pith_number":"pith:4ZOVL5SS","canonical_record":{"source":{"id":"1606.07869","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2016-06-25T04:35:47Z","cross_cats_sorted":[],"title_canon_sha256":"bb06da4ec22ebc1d3d6cbac42aadfb125bfdc1dbc3792ddd724cb87f2264d806","abstract_canon_sha256":"83da088945a9d887aa55da0eab207309ebb6540c8288f71c3beaafffc714e64c"},"schema_version":"1.0"},"canonical_sha256":"e65d55f65253af3976bdf075ed1b3bb7bd00ec5c4523cf425025148c8d2f2957","source":{"kind":"arxiv","id":"1606.07869","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.07869","created_at":"2026-05-18T01:11:54Z"},{"alias_kind":"arxiv_version","alias_value":"1606.07869v1","created_at":"2026-05-18T01:11:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.07869","created_at":"2026-05-18T01:11:54Z"},{"alias_kind":"pith_short_12","alias_value":"4ZOVL5SSKOXT","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_16","alias_value":"4ZOVL5SSKOXTS5V5","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_8","alias_value":"4ZOVL5SS","created_at":"2026-05-18T12:29:58Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:4ZOVL5SSKOXTS5V56B262GZ3W6","target":"record","payload":{"canonical_record":{"source":{"id":"1606.07869","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2016-06-25T04:35:47Z","cross_cats_sorted":[],"title_canon_sha256":"bb06da4ec22ebc1d3d6cbac42aadfb125bfdc1dbc3792ddd724cb87f2264d806","abstract_canon_sha256":"83da088945a9d887aa55da0eab207309ebb6540c8288f71c3beaafffc714e64c"},"schema_version":"1.0"},"canonical_sha256":"e65d55f65253af3976bdf075ed1b3bb7bd00ec5c4523cf425025148c8d2f2957","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:11:54.254338Z","signature_b64":"Uj81fxIV5Zu2slaCQWHljeaX+ErH8MupIkLciwUF5e7DbBRtta+wWXEOpDyJ9tQoEtR/KVsCIT7KTOBap8j8Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e65d55f65253af3976bdf075ed1b3bb7bd00ec5c4523cf425025148c8d2f2957","last_reissued_at":"2026-05-18T01:11:54.253971Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:11:54.253971Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1606.07869","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:11:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hQusjc8vTGFwj0ueSlXoSRW8QNY9HWBZgQCVF2IoDiNA4mVqaNcRZwUdH8VRfAbt6jNjjTMSzLR8R+u34zEwBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T03:13:45.378744Z"},"content_sha256":"c964e058df9f750a57a8cb59520652183c010ebe9b6ab6d0f7500454863f9f97","schema_version":"1.0","event_id":"sha256:c964e058df9f750a57a8cb59520652183c010ebe9b6ab6d0f7500454863f9f97"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:4ZOVL5SSKOXTS5V56B262GZ3W6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Representing Documents and Queries as Sets of Word Embedded Vectors for Information Retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Debasis Ganguly, Dwaipayan Roy, Gareth J.F. Jones, Mandar Mitra","submitted_at":"2016-06-25T04:35:47Z","abstract_excerpt":"A major difficulty in applying word vector embeddings in IR is in devising an effective and efficient strategy for obtaining representations of compound units of text, such as whole documents, (in comparison to the atomic words), for the purpose of indexing and scoring documents. Instead of striving for a suitable method for obtaining a single vector representation of a large document of text, we rather aim for developing a similarity metric that makes use of the similarities between the individual embedded word vectors in a document and a query. More specifically, we represent a document and "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.07869","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:11:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ASGSw63mhzo8ZykZ6I8Mu2tOYbIA2zGAKV0XeI6uUcLwGXM1O2KWf9ZB6+ij2nuXAzEed1ZIMtXwuDvrp2U/AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T03:13:45.379143Z"},"content_sha256":"858daa768e96ef468f8e18895dbc2036b68993e2536b0aaa1fce854bb0ac33af","schema_version":"1.0","event_id":"sha256:858daa768e96ef468f8e18895dbc2036b68993e2536b0aaa1fce854bb0ac33af"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4ZOVL5SSKOXTS5V56B262GZ3W6/bundle.json","state_url":"https://pith.science/pith/4ZOVL5SSKOXTS5V56B262GZ3W6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4ZOVL5SSKOXTS5V56B262GZ3W6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T03:13:45Z","links":{"resolver":"https://pith.science/pith/4ZOVL5SSKOXTS5V56B262GZ3W6","bundle":"https://pith.science/pith/4ZOVL5SSKOXTS5V56B262GZ3W6/bundle.json","state":"https://pith.science/pith/4ZOVL5SSKOXTS5V56B262GZ3W6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4ZOVL5SSKOXTS5V56B262GZ3W6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:4ZOVL5SSKOXTS5V56B262GZ3W6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"83da088945a9d887aa55da0eab207309ebb6540c8288f71c3beaafffc714e64c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2016-06-25T04:35:47Z","title_canon_sha256":"bb06da4ec22ebc1d3d6cbac42aadfb125bfdc1dbc3792ddd724cb87f2264d806"},"schema_version":"1.0","source":{"id":"1606.07869","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1606.07869","created_at":"2026-05-18T01:11:54Z"},{"alias_kind":"arxiv_version","alias_value":"1606.07869v1","created_at":"2026-05-18T01:11:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1606.07869","created_at":"2026-05-18T01:11:54Z"},{"alias_kind":"pith_short_12","alias_value":"4ZOVL5SSKOXT","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_16","alias_value":"4ZOVL5SSKOXTS5V5","created_at":"2026-05-18T12:29:58Z"},{"alias_kind":"pith_short_8","alias_value":"4ZOVL5SS","created_at":"2026-05-18T12:29:58Z"}],"graph_snapshots":[{"event_id":"sha256:858daa768e96ef468f8e18895dbc2036b68993e2536b0aaa1fce854bb0ac33af","target":"graph","created_at":"2026-05-18T01:11:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"A major difficulty in applying word vector embeddings in IR is in devising an effective and efficient strategy for obtaining representations of compound units of text, such as whole documents, (in comparison to the atomic words), for the purpose of indexing and scoring documents. Instead of striving for a suitable method for obtaining a single vector representation of a large document of text, we rather aim for developing a similarity metric that makes use of the similarities between the individual embedded word vectors in a document and a query. More specifically, we represent a document and ","authors_text":"Debasis Ganguly, Dwaipayan Roy, Gareth J.F. Jones, Mandar Mitra","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2016-06-25T04:35:47Z","title":"Representing Documents and Queries as Sets of Word Embedded Vectors for Information Retrieval"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1606.07869","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c964e058df9f750a57a8cb59520652183c010ebe9b6ab6d0f7500454863f9f97","target":"record","created_at":"2026-05-18T01:11:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"83da088945a9d887aa55da0eab207309ebb6540c8288f71c3beaafffc714e64c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2016-06-25T04:35:47Z","title_canon_sha256":"bb06da4ec22ebc1d3d6cbac42aadfb125bfdc1dbc3792ddd724cb87f2264d806"},"schema_version":"1.0","source":{"id":"1606.07869","kind":"arxiv","version":1}},"canonical_sha256":"e65d55f65253af3976bdf075ed1b3bb7bd00ec5c4523cf425025148c8d2f2957","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e65d55f65253af3976bdf075ed1b3bb7bd00ec5c4523cf425025148c8d2f2957","first_computed_at":"2026-05-18T01:11:54.253971Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:11:54.253971Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Uj81fxIV5Zu2slaCQWHljeaX+ErH8MupIkLciwUF5e7DbBRtta+wWXEOpDyJ9tQoEtR/KVsCIT7KTOBap8j8Dw==","signature_status":"signed_v1","signed_at":"2026-05-18T01:11:54.254338Z","signed_message":"canonical_sha256_bytes"},"source_id":"1606.07869","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c964e058df9f750a57a8cb59520652183c010ebe9b6ab6d0f7500454863f9f97","sha256:858daa768e96ef468f8e18895dbc2036b68993e2536b0aaa1fce854bb0ac33af"],"state_sha256":"f787680a792866acab0f8707a1522f4885b5f652f6d7a81769902c34da930fd2"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OsWvxW+d7L8hM2T7Dbyx52pSlROjk4hXLb80BiKFUH1oh3fQNbAPM25iTx5mOJFfRwJ9I4iOW8OgrBufsnabCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T03:13:45.381569Z","bundle_sha256":"736ba3fe0e39a6a48f4e55937c94ceaffa7d8c8544caa2baf46528c538fbef67"}}