{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV","short_pith_number":"pith:MMQ3ZJ7C","canonical_record":{"source":{"id":"1709.09500","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-27T13:31:41Z","cross_cats_sorted":[],"title_canon_sha256":"4952eb60f29af6f2920720148b266c3d07d9c3e3d5865b1b250df7ac33adf7f9","abstract_canon_sha256":"7616db58bc8531f22662efb0a7faad8f559c07a7b54767a628371bdcf2abf93c"},"schema_version":"1.0"},"canonical_sha256":"6321bca7e2b68f6fcb37ddadac8b4d3d68c0418e2723bc9e98cea18af50fd567","source":{"kind":"arxiv","id":"1709.09500","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.09500","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"arxiv_version","alias_value":"1709.09500v1","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.09500","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"pith_short_12","alias_value":"MMQ3ZJ7CW2HW","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MMQ3ZJ7CW2HW7SZX","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MMQ3ZJ7C","created_at":"2026-05-18T12:31:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV","target":"record","payload":{"canonical_record":{"source":{"id":"1709.09500","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-27T13:31:41Z","cross_cats_sorted":[],"title_canon_sha256":"4952eb60f29af6f2920720148b266c3d07d9c3e3d5865b1b250df7ac33adf7f9","abstract_canon_sha256":"7616db58bc8531f22662efb0a7faad8f559c07a7b54767a628371bdcf2abf93c"},"schema_version":"1.0"},"canonical_sha256":"6321bca7e2b68f6fcb37ddadac8b4d3d68c0418e2723bc9e98cea18af50fd567","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:34:11.531274Z","signature_b64":"k9kc623Et+qCVJ/cnpdCs2W5VFf0yHQ8hu15KgfDPDFMoqGGNsQpmP4Fofr3hDAsxeeC8N2fL4vI6Rf8Vc9sAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6321bca7e2b68f6fcb37ddadac8b4d3d68c0418e2723bc9e98cea18af50fd567","last_reissued_at":"2026-05-18T00:34:11.530713Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:34:11.530713Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.09500","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:34:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PX31EiI1QF5OG1XLsxoQ/nC8m/LGfhzVlrmbp8teRXj0PbaqcdGBFeqHvdG994XyaTSnGxn/kJLFT6dM1830BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T06:29:10.060797Z"},"content_sha256":"2fe0f19247e5d0f0492cf0aa1fcc932aeeb13e92e168a077740be05844648512","schema_version":"1.0","event_id":"sha256:2fe0f19247e5d0f0492cf0aa1fcc932aeeb13e92e168a077740be05844648512"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Replicability Analysis for Natural Language Processing: Testing Significance with Multiple Datasets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Gili Baumer, Marina Bogomolov, Roi Reichart, Rotem Dror","submitted_at":"2017-09-27T13:31:41Z","abstract_excerpt":"With the ever-growing amounts of textual data from a large variety of languages, domains, and genres, it has become standard to evaluate NLP algorithms on multiple datasets in order to ensure consistent performance across heterogeneous setups. However, such multiple comparisons pose significant challenges to traditional statistical analysis methods in NLP and can lead to erroneous conclusions. In this paper, we propose a Replicability Analysis framework for a statistically sound analysis of multiple comparisons between algorithms for NLP tasks. We discuss the theoretical advantages of this fra"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.09500","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:34:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MGmoFs4sLgTxH03SrRxSibkURfJhymi7SS88HppWrf9l38ttjDBkl0PdVOQggWvfi0SKxFm2w+ehtMr5PTRIDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T06:29:10.061138Z"},"content_sha256":"792af424e1e7c16712fc41df763f3cb0631ee6131d685b2a1c5c3f6c053e131a","schema_version":"1.0","event_id":"sha256:792af424e1e7c16712fc41df763f3cb0631ee6131d685b2a1c5c3f6c053e131a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV/bundle.json","state_url":"https://pith.science/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T06:29:10Z","links":{"resolver":"https://pith.science/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV","bundle":"https://pith.science/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV/bundle.json","state":"https://pith.science/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:MMQ3ZJ7CW2HW7SZX3WW2ZC2NHV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7616db58bc8531f22662efb0a7faad8f559c07a7b54767a628371bdcf2abf93c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-27T13:31:41Z","title_canon_sha256":"4952eb60f29af6f2920720148b266c3d07d9c3e3d5865b1b250df7ac33adf7f9"},"schema_version":"1.0","source":{"id":"1709.09500","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.09500","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"arxiv_version","alias_value":"1709.09500v1","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.09500","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"pith_short_12","alias_value":"MMQ3ZJ7CW2HW","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MMQ3ZJ7CW2HW7SZX","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MMQ3ZJ7C","created_at":"2026-05-18T12:31:31Z"}],"graph_snapshots":[{"event_id":"sha256:792af424e1e7c16712fc41df763f3cb0631ee6131d685b2a1c5c3f6c053e131a","target":"graph","created_at":"2026-05-18T00:34:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"With the ever-growing amounts of textual data from a large variety of languages, domains, and genres, it has become standard to evaluate NLP algorithms on multiple datasets in order to ensure consistent performance across heterogeneous setups. However, such multiple comparisons pose significant challenges to traditional statistical analysis methods in NLP and can lead to erroneous conclusions. In this paper, we propose a Replicability Analysis framework for a statistically sound analysis of multiple comparisons between algorithms for NLP tasks. We discuss the theoretical advantages of this fra","authors_text":"Gili Baumer, Marina Bogomolov, Roi Reichart, Rotem Dror","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-27T13:31:41Z","title":"Replicability Analysis for Natural Language Processing: Testing Significance with Multiple Datasets"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.09500","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2fe0f19247e5d0f0492cf0aa1fcc932aeeb13e92e168a077740be05844648512","target":"record","created_at":"2026-05-18T00:34:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7616db58bc8531f22662efb0a7faad8f559c07a7b54767a628371bdcf2abf93c","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-27T13:31:41Z","title_canon_sha256":"4952eb60f29af6f2920720148b266c3d07d9c3e3d5865b1b250df7ac33adf7f9"},"schema_version":"1.0","source":{"id":"1709.09500","kind":"arxiv","version":1}},"canonical_sha256":"6321bca7e2b68f6fcb37ddadac8b4d3d68c0418e2723bc9e98cea18af50fd567","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6321bca7e2b68f6fcb37ddadac8b4d3d68c0418e2723bc9e98cea18af50fd567","first_computed_at":"2026-05-18T00:34:11.530713Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:34:11.530713Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"k9kc623Et+qCVJ/cnpdCs2W5VFf0yHQ8hu15KgfDPDFMoqGGNsQpmP4Fofr3hDAsxeeC8N2fL4vI6Rf8Vc9sAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:34:11.531274Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.09500","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2fe0f19247e5d0f0492cf0aa1fcc932aeeb13e92e168a077740be05844648512","sha256:792af424e1e7c16712fc41df763f3cb0631ee6131d685b2a1c5c3f6c053e131a"],"state_sha256":"8718b38774ca4de5e4544114f40dedc06a175e2ddef85c3c22a43dfc801ab7d0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KsxYBOSVJrlsy5uoUJlxmwLoax9v4bYTf0MC0jdN5Md2a5iBkBDPaqd8pN3j9+EtgVacezkYkS18xftW9/h2Cw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T06:29:10.063020Z","bundle_sha256":"9ce2401eddcafed8995ea5a2f537bd76c6e4f446d317d430fe649a302aff3dce"}}