{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:VLZCI6PMJ5A4XYWNQT3LWDY7MG","short_pith_number":"pith:VLZCI6PM","canonical_record":{"source":{"id":"1512.00765","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-12-02T16:31:20Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"fc0e0e388061c454b291c0b7dacadc9d263952ca4cb79be94620206145ce06f5","abstract_canon_sha256":"55032dd276530c2eb0a204769af1444c91e6261d7100324fc0139c86ed07d11a"},"schema_version":"1.0"},"canonical_sha256":"aaf22479ec4f41cbe2cd84f6bb0f1f61b71c16ad237b4874724764c0763aa597","source":{"kind":"arxiv","id":"1512.00765","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.00765","created_at":"2026-05-18T01:25:25Z"},{"alias_kind":"arxiv_version","alias_value":"1512.00765v1","created_at":"2026-05-18T01:25:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.00765","created_at":"2026-05-18T01:25:25Z"},{"alias_kind":"pith_short_12","alias_value":"VLZCI6PMJ5A4","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_16","alias_value":"VLZCI6PMJ5A4XYWN","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_8","alias_value":"VLZCI6PM","created_at":"2026-05-18T12:29:47Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:VLZCI6PMJ5A4XYWNQT3LWDY7MG","target":"record","payload":{"canonical_record":{"source":{"id":"1512.00765","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-12-02T16:31:20Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"fc0e0e388061c454b291c0b7dacadc9d263952ca4cb79be94620206145ce06f5","abstract_canon_sha256":"55032dd276530c2eb0a204769af1444c91e6261d7100324fc0139c86ed07d11a"},"schema_version":"1.0"},"canonical_sha256":"aaf22479ec4f41cbe2cd84f6bb0f1f61b71c16ad237b4874724764c0763aa597","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:25:25.147045Z","signature_b64":"WlULYSK7pSH7Z2C7rcCYNrYsvfVSfeg70uQt8GnNSkx/bZGt/7nr2jRDwUZKzaQtS02HPGoVC/4aQX0/V3y+Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aaf22479ec4f41cbe2cd84f6bb0f1f61b71c16ad237b4874724764c0763aa597","last_reissued_at":"2026-05-18T01:25:25.146434Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:25:25.146434Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1512.00765","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:25:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PvXcoSiTowygStZ5k2E0I3VyPBALGt233Hr19a4f1Nnmwc4ol0EU/G11xroncKzAmm8icV+EZYZ8y4f1C4d5CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T18:00:11.199000Z"},"content_sha256":"4ec7ac698745da93b7cec040f0e46a9274107032e1bc60799b9b26b6ca960907","schema_version":"1.0","event_id":"sha256:4ec7ac698745da93b7cec040f0e46a9274107032e1bc60799b9b26b6ca960907"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:VLZCI6PMJ5A4XYWNQT3LWDY7MG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Semantic Similarity for Very Short Texts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.IR","authors_text":"Bart Dhoedt, Cedric De Boom, Steven Bohez, Steven Van Canneyt, Thomas Demeester","submitted_at":"2015-12-02T16:31:20Z","abstract_excerpt":"Levering data on social media, such as Twitter and Facebook, requires information retrieval algorithms to become able to relate very short text fragments to each other. Traditional text similarity methods such as tf-idf cosine-similarity, based on word overlap, mostly fail to produce good results in this case, since word overlap is little or non-existent. Recently, distributed word representations, or word embeddings, have been shown to successfully allow words to match on the semantic level. In order to pair short text fragments - as a concatenation of separate words - an adequate distributed"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.00765","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:25:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Txji7YEraCYWgi56AtS9gBLCnDns5P1Dtgs4gXkQYoPEBEb+3bjPPknj5nqJib+ir/oWfV3pA2a56Hjcly3HDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T18:00:11.199378Z"},"content_sha256":"4a484fa0e2a21e8b088912d0a4474217ca843e32f0917bbec503811aa6ec22f1","schema_version":"1.0","event_id":"sha256:4a484fa0e2a21e8b088912d0a4474217ca843e32f0917bbec503811aa6ec22f1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG/bundle.json","state_url":"https://pith.science/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T18:00:11Z","links":{"resolver":"https://pith.science/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG","bundle":"https://pith.science/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG/bundle.json","state":"https://pith.science/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VLZCI6PMJ5A4XYWNQT3LWDY7MG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:VLZCI6PMJ5A4XYWNQT3LWDY7MG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"55032dd276530c2eb0a204769af1444c91e6261d7100324fc0139c86ed07d11a","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-12-02T16:31:20Z","title_canon_sha256":"fc0e0e388061c454b291c0b7dacadc9d263952ca4cb79be94620206145ce06f5"},"schema_version":"1.0","source":{"id":"1512.00765","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.00765","created_at":"2026-05-18T01:25:25Z"},{"alias_kind":"arxiv_version","alias_value":"1512.00765v1","created_at":"2026-05-18T01:25:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.00765","created_at":"2026-05-18T01:25:25Z"},{"alias_kind":"pith_short_12","alias_value":"VLZCI6PMJ5A4","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_16","alias_value":"VLZCI6PMJ5A4XYWN","created_at":"2026-05-18T12:29:47Z"},{"alias_kind":"pith_short_8","alias_value":"VLZCI6PM","created_at":"2026-05-18T12:29:47Z"}],"graph_snapshots":[{"event_id":"sha256:4a484fa0e2a21e8b088912d0a4474217ca843e32f0917bbec503811aa6ec22f1","target":"graph","created_at":"2026-05-18T01:25:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Levering data on social media, such as Twitter and Facebook, requires information retrieval algorithms to become able to relate very short text fragments to each other. Traditional text similarity methods such as tf-idf cosine-similarity, based on word overlap, mostly fail to produce good results in this case, since word overlap is little or non-existent. Recently, distributed word representations, or word embeddings, have been shown to successfully allow words to match on the semantic level. In order to pair short text fragments - as a concatenation of separate words - an adequate distributed","authors_text":"Bart Dhoedt, Cedric De Boom, Steven Bohez, Steven Van Canneyt, Thomas Demeester","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-12-02T16:31:20Z","title":"Learning Semantic Similarity for Very Short Texts"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.00765","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4ec7ac698745da93b7cec040f0e46a9274107032e1bc60799b9b26b6ca960907","target":"record","created_at":"2026-05-18T01:25:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"55032dd276530c2eb0a204769af1444c91e6261d7100324fc0139c86ed07d11a","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-12-02T16:31:20Z","title_canon_sha256":"fc0e0e388061c454b291c0b7dacadc9d263952ca4cb79be94620206145ce06f5"},"schema_version":"1.0","source":{"id":"1512.00765","kind":"arxiv","version":1}},"canonical_sha256":"aaf22479ec4f41cbe2cd84f6bb0f1f61b71c16ad237b4874724764c0763aa597","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"aaf22479ec4f41cbe2cd84f6bb0f1f61b71c16ad237b4874724764c0763aa597","first_computed_at":"2026-05-18T01:25:25.146434Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:25:25.146434Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WlULYSK7pSH7Z2C7rcCYNrYsvfVSfeg70uQt8GnNSkx/bZGt/7nr2jRDwUZKzaQtS02HPGoVC/4aQX0/V3y+Dg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:25:25.147045Z","signed_message":"canonical_sha256_bytes"},"source_id":"1512.00765","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4ec7ac698745da93b7cec040f0e46a9274107032e1bc60799b9b26b6ca960907","sha256:4a484fa0e2a21e8b088912d0a4474217ca843e32f0917bbec503811aa6ec22f1"],"state_sha256":"73ddd6a16cbbebe1e5897a1ae80bb1ac1056aa03ed400b39cc76e9592535e562"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LlO9eILgaQJpdl2bU2JAK/I98XmQvyn9uvV4sEF7x/YJARcf9KgvR2Ai8k8wx4cvlJAFzoc86bflFu+KO+hODA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T18:00:11.201395Z","bundle_sha256":"a528b6a943d679ad74ff459803d26df70daafe38afba9088cea80f9bd6c2e39b"}}