{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:C3SQDOQFJREZSUEILIHYEGRLRW","short_pith_number":"pith:C3SQDOQF","canonical_record":{"source":{"id":"1806.00692","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-02T19:14:39Z","cross_cats_sorted":[],"title_canon_sha256":"79bef1e41b1f7359d70ca49705e08c70d39543390570e898c51855d44859f0ed","abstract_canon_sha256":"ce0a5463d201e6ab1f99c083d0c6819bd78bc6571e9ab5e7377703552ccf1a62"},"schema_version":"1.0"},"canonical_sha256":"16e501ba054c499950885a0f821a2b8d99e1b0ffa8fb6fa7861d5e7721502202","source":{"kind":"arxiv","id":"1806.00692","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.00692","created_at":"2026-05-18T00:13:17Z"},{"alias_kind":"arxiv_version","alias_value":"1806.00692v3","created_at":"2026-05-18T00:13:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00692","created_at":"2026-05-18T00:13:17Z"},{"alias_kind":"pith_short_12","alias_value":"C3SQDOQFJREZ","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"C3SQDOQFJREZSUEI","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"C3SQDOQF","created_at":"2026-05-18T12:32:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:C3SQDOQFJREZSUEILIHYEGRLRW","target":"record","payload":{"canonical_record":{"source":{"id":"1806.00692","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-02T19:14:39Z","cross_cats_sorted":[],"title_canon_sha256":"79bef1e41b1f7359d70ca49705e08c70d39543390570e898c51855d44859f0ed","abstract_canon_sha256":"ce0a5463d201e6ab1f99c083d0c6819bd78bc6571e9ab5e7377703552ccf1a62"},"schema_version":"1.0"},"canonical_sha256":"16e501ba054c499950885a0f821a2b8d99e1b0ffa8fb6fa7861d5e7721502202","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:17.025517Z","signature_b64":"HioZojupFJ4OsDmeqvK9FmGgy/OIHLv3NHBnvcgwsSOR+6aU8W30kYO4jKmp6p3/pWWVp86BElWfQ+cMZ8RNCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"16e501ba054c499950885a0f821a2b8d99e1b0ffa8fb6fa7861d5e7721502202","last_reissued_at":"2026-05-18T00:13:17.024758Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:17.024758Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.00692","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lsxPhCqGEulpYx8vWQxvqlecpB3BaxHrxEFVZs0SvZ1KbH+nKBk7+sCRLT+Y0dx+G/i1JdvX7w4yOjuBdTvWDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T13:19:59.445082Z"},"content_sha256":"2e22aa51052c9914a6a6df85bc5f0d468a11fe3265ea391acf27a65466ecb5bf","schema_version":"1.0","event_id":"sha256:2e22aa51052c9914a6a6df85bc5f0d468a11fe3265ea391acf27a65466ecb5bf"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:C3SQDOQFJREZSUEILIHYEGRLRW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Stress Test Evaluation for Natural Language Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Aakanksha Naik, Abhilasha Ravichander, Carolyn Rose, Graham Neubig, Norman Sadeh","submitted_at":"2018-06-02T19:14:39Z","abstract_excerpt":"Natural language inference (NLI) is the task of determining if a natural language hypothesis can be inferred from a given premise in a justifiable manner. NLI was proposed as a benchmark task for natural language understanding. Existing models perform well at standard datasets for NLI, achieving impressive results across different genres of text. However, the extent to which these models understand the semantic content of sentences is unclear. In this work, we propose an evaluation methodology consisting of automatically constructed \"stress tests\" that allow us to examine whether systems have "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.00692","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fuATF61T3Z0mtjXT9insPtqxS+CXE7har9Tjzk55rsvWpgOVCS4snajZBAZaGru00x4l/pi637cbq4EVayVcBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T13:19:59.445443Z"},"content_sha256":"ec34442910f3d53a97e5df3f9aeedddc4feac9d23686fa2e6d056e8a5d0e44ed","schema_version":"1.0","event_id":"sha256:ec34442910f3d53a97e5df3f9aeedddc4feac9d23686fa2e6d056e8a5d0e44ed"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/C3SQDOQFJREZSUEILIHYEGRLRW/bundle.json","state_url":"https://pith.science/pith/C3SQDOQFJREZSUEILIHYEGRLRW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/C3SQDOQFJREZSUEILIHYEGRLRW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T13:19:59Z","links":{"resolver":"https://pith.science/pith/C3SQDOQFJREZSUEILIHYEGRLRW","bundle":"https://pith.science/pith/C3SQDOQFJREZSUEILIHYEGRLRW/bundle.json","state":"https://pith.science/pith/C3SQDOQFJREZSUEILIHYEGRLRW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/C3SQDOQFJREZSUEILIHYEGRLRW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:C3SQDOQFJREZSUEILIHYEGRLRW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ce0a5463d201e6ab1f99c083d0c6819bd78bc6571e9ab5e7377703552ccf1a62","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-02T19:14:39Z","title_canon_sha256":"79bef1e41b1f7359d70ca49705e08c70d39543390570e898c51855d44859f0ed"},"schema_version":"1.0","source":{"id":"1806.00692","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.00692","created_at":"2026-05-18T00:13:17Z"},{"alias_kind":"arxiv_version","alias_value":"1806.00692v3","created_at":"2026-05-18T00:13:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00692","created_at":"2026-05-18T00:13:17Z"},{"alias_kind":"pith_short_12","alias_value":"C3SQDOQFJREZ","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"C3SQDOQFJREZSUEI","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"C3SQDOQF","created_at":"2026-05-18T12:32:16Z"}],"graph_snapshots":[{"event_id":"sha256:ec34442910f3d53a97e5df3f9aeedddc4feac9d23686fa2e6d056e8a5d0e44ed","target":"graph","created_at":"2026-05-18T00:13:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Natural language inference (NLI) is the task of determining if a natural language hypothesis can be inferred from a given premise in a justifiable manner. NLI was proposed as a benchmark task for natural language understanding. Existing models perform well at standard datasets for NLI, achieving impressive results across different genres of text. However, the extent to which these models understand the semantic content of sentences is unclear. In this work, we propose an evaluation methodology consisting of automatically constructed \"stress tests\" that allow us to examine whether systems have ","authors_text":"Aakanksha Naik, Abhilasha Ravichander, Carolyn Rose, Graham Neubig, Norman Sadeh","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-02T19:14:39Z","title":"Stress Test Evaluation for Natural Language Inference"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.00692","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2e22aa51052c9914a6a6df85bc5f0d468a11fe3265ea391acf27a65466ecb5bf","target":"record","created_at":"2026-05-18T00:13:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ce0a5463d201e6ab1f99c083d0c6819bd78bc6571e9ab5e7377703552ccf1a62","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-02T19:14:39Z","title_canon_sha256":"79bef1e41b1f7359d70ca49705e08c70d39543390570e898c51855d44859f0ed"},"schema_version":"1.0","source":{"id":"1806.00692","kind":"arxiv","version":3}},"canonical_sha256":"16e501ba054c499950885a0f821a2b8d99e1b0ffa8fb6fa7861d5e7721502202","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"16e501ba054c499950885a0f821a2b8d99e1b0ffa8fb6fa7861d5e7721502202","first_computed_at":"2026-05-18T00:13:17.024758Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:17.024758Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HioZojupFJ4OsDmeqvK9FmGgy/OIHLv3NHBnvcgwsSOR+6aU8W30kYO4jKmp6p3/pWWVp86BElWfQ+cMZ8RNCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:17.025517Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.00692","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2e22aa51052c9914a6a6df85bc5f0d468a11fe3265ea391acf27a65466ecb5bf","sha256:ec34442910f3d53a97e5df3f9aeedddc4feac9d23686fa2e6d056e8a5d0e44ed"],"state_sha256":"8c1ab7c97d39e8f677a5e81aeeabdad949bd061abbb6d127c473663db5f3421a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"eET06gHJ2G/pzVKhMCxQ4yPu/MUFUSDJhytIDij8K1ngMzuo86T2z+aiNh7PBPurs3EFgEuB/qKQeu0eTChACQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T13:19:59.448378Z","bundle_sha256":"faed78ae9ef73d01c1bdd71b5958f1e080030dc06114cee1f0d32131cf4352b7"}}