{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:OM4WY75O2D5OIEOUTQAAUQCDJJ","short_pith_number":"pith:OM4WY75O","schema_version":"1.0","canonical_sha256":"73396c7faed0fae411d49c000a40434a7dd7230b0d8bd95a7905a9e2ba3a4542","source":{"kind":"arxiv","id":"1902.01007","version":4},"attestation_state":"computed","paper":{"title":"Right for the Wrong Reasons: Diagnosing Syntactic Heuristics in Natural Language Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Ellie Pavlick, R. Thomas McCoy, Tal Linzen","submitted_at":"2019-02-04T01:54:19Z","abstract_excerpt":"A machine learning system can score well on a given test set by relying on heuristics that are effective for frequent example types but break down in more challenging cases. We study this issue within natural language inference (NLI), the task of determining whether one sentence entails another. We hypothesize that statistical NLI models may adopt three fallible syntactic heuristics: the lexical overlap heuristic, the subsequence heuristic, and the constituent heuristic. To determine whether models have adopted these heuristics, we introduce a controlled evaluation set called HANS (Heuristic A"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.01007","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-02-04T01:54:19Z","cross_cats_sorted":[],"title_canon_sha256":"8c129c52b457f7a027e17c971ae939891db4edbe6984e7411fee9da979fd3398","abstract_canon_sha256":"0bc1c1177e871f39f6ab32cfb2c41a453038e44f70faf368ea01e28b7515e356"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:42.414477Z","signature_b64":"iuNCoUG+sppGvgfOdHs5OsHtLf1QJE8Kc59izVRaRFDUGwXsM/eqll3JhMrHWllqAXIlUNz5oZMKlpf2bjUxCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"73396c7faed0fae411d49c000a40434a7dd7230b0d8bd95a7905a9e2ba3a4542","last_reissued_at":"2026-05-17T23:42:42.413854Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:42.413854Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Right for the Wrong Reasons: Diagnosing Syntactic Heuristics in Natural Language Inference","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Ellie Pavlick, R. Thomas McCoy, Tal Linzen","submitted_at":"2019-02-04T01:54:19Z","abstract_excerpt":"A machine learning system can score well on a given test set by relying on heuristics that are effective for frequent example types but break down in more challenging cases. We study this issue within natural language inference (NLI), the task of determining whether one sentence entails another. We hypothesize that statistical NLI models may adopt three fallible syntactic heuristics: the lexical overlap heuristic, the subsequence heuristic, and the constituent heuristic. To determine whether models have adopted these heuristics, we introduce a controlled evaluation set called HANS (Heuristic A"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.01007","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.01007","created_at":"2026-05-17T23:42:42.413946+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.01007v4","created_at":"2026-05-17T23:42:42.413946+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.01007","created_at":"2026-05-17T23:42:42.413946+00:00"},{"alias_kind":"pith_short_12","alias_value":"OM4WY75O2D5O","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_16","alias_value":"OM4WY75O2D5OIEOU","created_at":"2026-05-18T12:33:24.271573+00:00"},{"alias_kind":"pith_short_8","alias_value":"OM4WY75O","created_at":"2026-05-18T12:33:24.271573+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":10,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2605.17829","citing_title":"Interactive Evaluation Requires a Design Science","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2506.02671","citing_title":"Test-Time Distillation for Continual Model Adaptation","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2508.16860","citing_title":"TriagerX: Dual Transformers for Bug Triaging Tasks with Content and Interaction Based Rankings","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2512.10421","citing_title":"Neural Collapse in Test-Time Adaptation","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"1905.00537","citing_title":"SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems","ref_index":124,"is_internal_anchor":true},{"citing_arxiv_id":"2110.08207","citing_title":"Multitask Prompted Training Enables Zero-Shot Task Generalization","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"1905.10044","citing_title":"BoolQ: Exploring the Surprising Difficulty of Natural Yes/No Questions","ref_index":19,"is_internal_anchor":false},{"citing_arxiv_id":"2605.11501","citing_title":"Decaf: Improving Neural Decompilation with Automatic Feedback and Search","ref_index":28,"is_internal_anchor":false},{"citing_arxiv_id":"2005.14165","citing_title":"Language Models are Few-Shot Learners","ref_index":53,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02658","citing_title":"Deciphering Shortcut Learning from an Evolutionary Game Theory Perspective","ref_index":16,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ","json":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ.json","graph_json":"https://pith.science/api/pith-number/OM4WY75O2D5OIEOUTQAAUQCDJJ/graph.json","events_json":"https://pith.science/api/pith-number/OM4WY75O2D5OIEOUTQAAUQCDJJ/events.json","paper":"https://pith.science/paper/OM4WY75O"},"agent_actions":{"view_html":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ","download_json":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ.json","view_paper":"https://pith.science/paper/OM4WY75O","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.01007&json=true","fetch_graph":"https://pith.science/api/pith-number/OM4WY75O2D5OIEOUTQAAUQCDJJ/graph.json","fetch_events":"https://pith.science/api/pith-number/OM4WY75O2D5OIEOUTQAAUQCDJJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ/action/storage_attestation","attest_author":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ/action/author_attestation","sign_citation":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ/action/citation_signature","submit_replication":"https://pith.science/pith/OM4WY75O2D5OIEOUTQAAUQCDJJ/action/replication_record"}},"created_at":"2026-05-17T23:42:42.413946+00:00","updated_at":"2026-05-17T23:42:42.413946+00:00"}