{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:G4KD3KSDT3RXAAEZ5KNB3EQWZ7","short_pith_number":"pith:G4KD3KSD","canonical_record":{"source":{"id":"1906.01575","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-04T16:41:15Z","cross_cats_sorted":[],"title_canon_sha256":"5b9bb8f4b8d376a2d41f23cd4d376164eb6840ac9327e972750b183291b49067","abstract_canon_sha256":"e30551acdb401b079e65c41f0fa3bafaee7da9e8ee7c903a85b68702a47a5faf"},"schema_version":"1.0"},"canonical_sha256":"37143daa439ee3700099ea9a1d9216cfe827bb85feb37c563efe450fa0cf68cd","source":{"kind":"arxiv","id":"1906.01575","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.01575","created_at":"2026-05-17T23:44:16Z"},{"alias_kind":"arxiv_version","alias_value":"1906.01575v1","created_at":"2026-05-17T23:44:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.01575","created_at":"2026-05-17T23:44:16Z"},{"alias_kind":"pith_short_12","alias_value":"G4KD3KSDT3RX","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"G4KD3KSDT3RXAAEZ","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"G4KD3KSD","created_at":"2026-05-18T12:33:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:G4KD3KSDT3RXAAEZ5KNB3EQWZ7","target":"record","payload":{"canonical_record":{"source":{"id":"1906.01575","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-04T16:41:15Z","cross_cats_sorted":[],"title_canon_sha256":"5b9bb8f4b8d376a2d41f23cd4d376164eb6840ac9327e972750b183291b49067","abstract_canon_sha256":"e30551acdb401b079e65c41f0fa3bafaee7da9e8ee7c903a85b68702a47a5faf"},"schema_version":"1.0"},"canonical_sha256":"37143daa439ee3700099ea9a1d9216cfe827bb85feb37c563efe450fa0cf68cd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:16.094237Z","signature_b64":"UJKJgrc4eIcMAO1SDxMXpKvYGHWhxNC+gNTVYNxB0+Zj4ycNqAHd4+yPkSvmvXEb4BJahwYf8cgVzuGDeMeuDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"37143daa439ee3700099ea9a1d9216cfe827bb85feb37c563efe450fa0cf68cd","last_reissued_at":"2026-05-17T23:44:16.093737Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:16.093737Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.01575","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3TOOeVge+r9IEjVofsFDFKrGVQ2KLUq8DfJ0w8otfOCIsiSz7BKyntcaByYvx7ZgpYNXK854Jn890xzLRH1ABg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T02:39:42.475829Z"},"content_sha256":"a457601b2caf8c6345a4dbd489b5dcc27725a5831a192c5b6d54fc1091e94a73","schema_version":"1.0","event_id":"sha256:a457601b2caf8c6345a4dbd489b5dcc27725a5831a192c5b6d54fc1091e94a73"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:G4KD3KSDT3RXAAEZ5KNB3EQWZ7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Pitfalls in the Evaluation of Sentence Embeddings","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Andreas R\\\"uckl\\'e, Iryna Gurevych, Steffen Eger","submitted_at":"2019-06-04T16:41:15Z","abstract_excerpt":"Deep learning models continuously break new records across different NLP tasks. At the same time, their success exposes weaknesses of model evaluation. Here, we compile several key pitfalls of evaluation of sentence embeddings, a currently very popular NLP paradigm. These pitfalls include the comparison of embeddings of different sizes, normalization of embeddings, and the low (and diverging) correlations between transfer and probing tasks. Our motivation is to challenge the current evaluation of sentence embeddings and to provide an easy-to-access reference for future research. Based on our i"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.01575","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"noEJUxPZTOzcxg0mF/F0L3eXcjRhm0wSXL7nNNMerbWJc1rCmqrXNvjIwi/xW85HeSYWCR8XaGiEs1j5AtCCDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T02:39:42.476557Z"},"content_sha256":"55af46d2b2f0274d1affc9bd80cbdc26283e2707c83420e181ed54d71482d59d","schema_version":"1.0","event_id":"sha256:55af46d2b2f0274d1affc9bd80cbdc26283e2707c83420e181ed54d71482d59d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7/bundle.json","state_url":"https://pith.science/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T02:39:42Z","links":{"resolver":"https://pith.science/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7","bundle":"https://pith.science/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7/bundle.json","state":"https://pith.science/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/G4KD3KSDT3RXAAEZ5KNB3EQWZ7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:G4KD3KSDT3RXAAEZ5KNB3EQWZ7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e30551acdb401b079e65c41f0fa3bafaee7da9e8ee7c903a85b68702a47a5faf","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-04T16:41:15Z","title_canon_sha256":"5b9bb8f4b8d376a2d41f23cd4d376164eb6840ac9327e972750b183291b49067"},"schema_version":"1.0","source":{"id":"1906.01575","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.01575","created_at":"2026-05-17T23:44:16Z"},{"alias_kind":"arxiv_version","alias_value":"1906.01575v1","created_at":"2026-05-17T23:44:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.01575","created_at":"2026-05-17T23:44:16Z"},{"alias_kind":"pith_short_12","alias_value":"G4KD3KSDT3RX","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"G4KD3KSDT3RXAAEZ","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"G4KD3KSD","created_at":"2026-05-18T12:33:18Z"}],"graph_snapshots":[{"event_id":"sha256:55af46d2b2f0274d1affc9bd80cbdc26283e2707c83420e181ed54d71482d59d","target":"graph","created_at":"2026-05-17T23:44:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep learning models continuously break new records across different NLP tasks. At the same time, their success exposes weaknesses of model evaluation. Here, we compile several key pitfalls of evaluation of sentence embeddings, a currently very popular NLP paradigm. These pitfalls include the comparison of embeddings of different sizes, normalization of embeddings, and the low (and diverging) correlations between transfer and probing tasks. Our motivation is to challenge the current evaluation of sentence embeddings and to provide an easy-to-access reference for future research. Based on our i","authors_text":"Andreas R\\\"uckl\\'e, Iryna Gurevych, Steffen Eger","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-04T16:41:15Z","title":"Pitfalls in the Evaluation of Sentence Embeddings"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.01575","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a457601b2caf8c6345a4dbd489b5dcc27725a5831a192c5b6d54fc1091e94a73","target":"record","created_at":"2026-05-17T23:44:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e30551acdb401b079e65c41f0fa3bafaee7da9e8ee7c903a85b68702a47a5faf","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-04T16:41:15Z","title_canon_sha256":"5b9bb8f4b8d376a2d41f23cd4d376164eb6840ac9327e972750b183291b49067"},"schema_version":"1.0","source":{"id":"1906.01575","kind":"arxiv","version":1}},"canonical_sha256":"37143daa439ee3700099ea9a1d9216cfe827bb85feb37c563efe450fa0cf68cd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"37143daa439ee3700099ea9a1d9216cfe827bb85feb37c563efe450fa0cf68cd","first_computed_at":"2026-05-17T23:44:16.093737Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:16.093737Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UJKJgrc4eIcMAO1SDxMXpKvYGHWhxNC+gNTVYNxB0+Zj4ycNqAHd4+yPkSvmvXEb4BJahwYf8cgVzuGDeMeuDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:16.094237Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.01575","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a457601b2caf8c6345a4dbd489b5dcc27725a5831a192c5b6d54fc1091e94a73","sha256:55af46d2b2f0274d1affc9bd80cbdc26283e2707c83420e181ed54d71482d59d"],"state_sha256":"e3663458b83906b676f1c20ca56d12a95cbcad47154a26ec2b6b18a97ae90a37"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8CStz4wyslfc5iDbwkungORKMFtb22OduGcelVf6Xc2faVGULHCRGoH5NgpbchaWOI9wqlWpMDPNL+RMOOE8Cg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T02:39:42.480269Z","bundle_sha256":"feca97028e6ce572251541ebff9b7b1cb294a655a8774e676827bba7328782fe"}}