{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:BXHWQKC65MYJ7X2Q323WOFUOC2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ee64273503258b17011d8b7004eef4510bbb6d7547a51272df9ef7d5138786a0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-23T11:06:55Z","title_canon_sha256":"628830331dd442aa3161551b23d61beb04f5c30c1d05020f1fcd32a1049b0073"},"schema_version":"1.0","source":{"id":"1810.09774","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.09774","created_at":"2026-05-17T23:44:33Z"},{"alias_kind":"arxiv_version","alias_value":"1810.09774v3","created_at":"2026-05-17T23:44:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.09774","created_at":"2026-05-17T23:44:33Z"},{"alias_kind":"pith_short_12","alias_value":"BXHWQKC65MYJ","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"BXHWQKC65MYJ7X2Q","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"BXHWQKC6","created_at":"2026-05-18T12:32:16Z"}],"graph_snapshots":[{"event_id":"sha256:247da3e5a2a8f836914dbf57c8054be838648fca5a60f456dda8be98ddd654d1","target":"graph","created_at":"2026-05-17T23:44:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Neural network models have been very successful in natural language inference, with the best models reaching 90% accuracy in some benchmarks. However, the success of these models turns out to be largely benchmark specific. We show that models trained on a natural language inference dataset drawn from one benchmark fail to perform well in others, even if the notion of inference assumed in these benchmarks is the same or similar. We train six high performing neural network models on different datasets and show that each one of these has problems of generalizing when we replace the original test ","authors_text":"Aarne Talman, Stergios Chatzikyriakidis","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-23T11:06:55Z","title":"Testing the Generalization Power of Neural Network Models Across NLI Benchmarks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.09774","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fbd7c5a2d5677e0cd8758c1392170f33c205a4a07d8629ce1427070bdf89c264","target":"record","created_at":"2026-05-17T23:44:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ee64273503258b17011d8b7004eef4510bbb6d7547a51272df9ef7d5138786a0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-10-23T11:06:55Z","title_canon_sha256":"628830331dd442aa3161551b23d61beb04f5c30c1d05020f1fcd32a1049b0073"},"schema_version":"1.0","source":{"id":"1810.09774","kind":"arxiv","version":3}},"canonical_sha256":"0dcf68285eeb309fdf50deb767168e16809045b7a8e3013816888e90e8e735ed","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0dcf68285eeb309fdf50deb767168e16809045b7a8e3013816888e90e8e735ed","first_computed_at":"2026-05-17T23:44:33.916472Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:33.916472Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gZ1TTHYhOxAuaX7qa830rs9Crix1Lr8HebBlsTeorT/Ps3iOrhi1Tl0OZGqtGACDGQ1KEeKHpOXNdQXe3RMEAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:33.917162Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.09774","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fbd7c5a2d5677e0cd8758c1392170f33c205a4a07d8629ce1427070bdf89c264","sha256:247da3e5a2a8f836914dbf57c8054be838648fca5a60f456dda8be98ddd654d1"],"state_sha256":"76fed49280b7a7f95e454208f20224ad542ea8cba960397e801b3fdea07cb795"}