{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:EFESYLIEUID4M3ZJ2ZFADX5DZB","short_pith_number":"pith:EFESYLIE","canonical_record":{"source":{"id":"1804.11297","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-30T16:19:51Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"3bcb9ffd9f62c082ac2099f38fee9a2e451615179ac21279d77d225617840a29","abstract_canon_sha256":"d638082b0bcde14c31274c222860824f4353fce2144a9a059a94739b1c51ed26"},"schema_version":"1.0"},"canonical_sha256":"21492c2d04a207c66f29d64a01dfa3c86cdb1b2dcec64298229e0086771dda27","source":{"kind":"arxiv","id":"1804.11297","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.11297","created_at":"2026-05-18T00:07:27Z"},{"alias_kind":"arxiv_version","alias_value":"1804.11297v2","created_at":"2026-05-18T00:07:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.11297","created_at":"2026-05-18T00:07:27Z"},{"alias_kind":"pith_short_12","alias_value":"EFESYLIEUID4","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"EFESYLIEUID4M3ZJ","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"EFESYLIE","created_at":"2026-05-18T12:32:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:EFESYLIEUID4M3ZJ2ZFADX5DZB","target":"record","payload":{"canonical_record":{"source":{"id":"1804.11297","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-30T16:19:51Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"3bcb9ffd9f62c082ac2099f38fee9a2e451615179ac21279d77d225617840a29","abstract_canon_sha256":"d638082b0bcde14c31274c222860824f4353fce2144a9a059a94739b1c51ed26"},"schema_version":"1.0"},"canonical_sha256":"21492c2d04a207c66f29d64a01dfa3c86cdb1b2dcec64298229e0086771dda27","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:07:27.680720Z","signature_b64":"feiihu/Q7DVawArzgZP3w3+CR9hKyqnqSgEqhpt5EQ++dseY/UoADAZCQEcnDulBJvMXbyeXJyTIhaw79CsACQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"21492c2d04a207c66f29d64a01dfa3c86cdb1b2dcec64298229e0086771dda27","last_reissued_at":"2026-05-18T00:07:27.680004Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:07:27.680004Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.11297","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:07:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WuFuwbv0wWuiAUoq/kNW7QZCPxAidefzGerhBHA/ltyoxUJFDpDSO7eJhHQAUKYOvcz21Sam0PHdPzE2R0mRBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T02:22:14.305480Z"},"content_sha256":"faa2a8f60ecb66160d9e75a22a12724c5d37812217c18b314542a09afef4d168","schema_version":"1.0","event_id":"sha256:faa2a8f60ecb66160d9e75a22a12724c5d37812217c18b314542a09afef4d168"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:EFESYLIEUID4M3ZJ2ZFADX5DZB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Sampling strategies in Siamese Networks for unsupervised speech representation learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Corentin Dancette, Emmanuel Dupoux, Julien Karadayi, Neil Zeghidour, Rachid Riad, Thomas Schatz","submitted_at":"2018-04-30T16:19:51Z","abstract_excerpt":"Recent studies have investigated siamese network architectures for learning invariant speech representations using same-different side information at the word level. Here we investigate systematically an often ignored component of siamese networks: the sampling procedure (how pairs of same vs. different tokens are selected). We show that sampling strategies taking into account Zipf's Law, the distribution of speakers and the proportions of same and different pairs of words significantly impact the performance of the network. In particular, we show that word frequency compression improves learn"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.11297","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:07:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RV3jI8w6xnbxjM2Q4vCD6qK34kaeXWWBSXF3iPTZIdcL7S3YU8ag6KGpREymGP5UVXgWa1muU4K8yN85KidjCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-20T02:22:14.305849Z"},"content_sha256":"5d05c792aaee37df3080513bd740261fb4491b0e89dffe9c309e2b4410feb5b4","schema_version":"1.0","event_id":"sha256:5d05c792aaee37df3080513bd740261fb4491b0e89dffe9c309e2b4410feb5b4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB/bundle.json","state_url":"https://pith.science/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-20T02:22:14Z","links":{"resolver":"https://pith.science/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB","bundle":"https://pith.science/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB/bundle.json","state":"https://pith.science/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EFESYLIEUID4M3ZJ2ZFADX5DZB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:EFESYLIEUID4M3ZJ2ZFADX5DZB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d638082b0bcde14c31274c222860824f4353fce2144a9a059a94739b1c51ed26","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-30T16:19:51Z","title_canon_sha256":"3bcb9ffd9f62c082ac2099f38fee9a2e451615179ac21279d77d225617840a29"},"schema_version":"1.0","source":{"id":"1804.11297","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.11297","created_at":"2026-05-18T00:07:27Z"},{"alias_kind":"arxiv_version","alias_value":"1804.11297v2","created_at":"2026-05-18T00:07:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.11297","created_at":"2026-05-18T00:07:27Z"},{"alias_kind":"pith_short_12","alias_value":"EFESYLIEUID4","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"EFESYLIEUID4M3ZJ","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"EFESYLIE","created_at":"2026-05-18T12:32:22Z"}],"graph_snapshots":[{"event_id":"sha256:5d05c792aaee37df3080513bd740261fb4491b0e89dffe9c309e2b4410feb5b4","target":"graph","created_at":"2026-05-18T00:07:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recent studies have investigated siamese network architectures for learning invariant speech representations using same-different side information at the word level. Here we investigate systematically an often ignored component of siamese networks: the sampling procedure (how pairs of same vs. different tokens are selected). We show that sampling strategies taking into account Zipf's Law, the distribution of speakers and the proportions of same and different pairs of words significantly impact the performance of the network. In particular, we show that word frequency compression improves learn","authors_text":"Corentin Dancette, Emmanuel Dupoux, Julien Karadayi, Neil Zeghidour, Rachid Riad, Thomas Schatz","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-30T16:19:51Z","title":"Sampling strategies in Siamese Networks for unsupervised speech representation learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.11297","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:faa2a8f60ecb66160d9e75a22a12724c5d37812217c18b314542a09afef4d168","target":"record","created_at":"2026-05-18T00:07:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d638082b0bcde14c31274c222860824f4353fce2144a9a059a94739b1c51ed26","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-30T16:19:51Z","title_canon_sha256":"3bcb9ffd9f62c082ac2099f38fee9a2e451615179ac21279d77d225617840a29"},"schema_version":"1.0","source":{"id":"1804.11297","kind":"arxiv","version":2}},"canonical_sha256":"21492c2d04a207c66f29d64a01dfa3c86cdb1b2dcec64298229e0086771dda27","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"21492c2d04a207c66f29d64a01dfa3c86cdb1b2dcec64298229e0086771dda27","first_computed_at":"2026-05-18T00:07:27.680004Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:07:27.680004Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"feiihu/Q7DVawArzgZP3w3+CR9hKyqnqSgEqhpt5EQ++dseY/UoADAZCQEcnDulBJvMXbyeXJyTIhaw79CsACQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:07:27.680720Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.11297","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:faa2a8f60ecb66160d9e75a22a12724c5d37812217c18b314542a09afef4d168","sha256:5d05c792aaee37df3080513bd740261fb4491b0e89dffe9c309e2b4410feb5b4"],"state_sha256":"a66e1764be427a94a0278a48c92438cb047d85820ad62228169ab78e6ba9a42b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WLeqKEaTkTAqzwFaUieei3/hfnU+I/G2CMHFCdPyR3zbWjLZD8M6QJUcHiggBrmVCCvW0V7o5/Opto7x6Qa6Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-20T02:22:14.308968Z","bundle_sha256":"00211260bd00f2455bd0bd21fbe8a4b0ce0a6e5535eee36e3b4f698e267f6583"}}