{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:ZHU6TP4WPS7TE6OBHWH4KRAVEF","short_pith_number":"pith:ZHU6TP4W","schema_version":"1.0","canonical_sha256":"c9e9e9bf967cbf3279c13d8fc54415216388cf25b05669c585b0f86469789e40","source":{"kind":"arxiv","id":"1707.07328","version":1},"attestation_state":"computed","paper":{"title":"Adversarial Examples for Evaluating Reading Comprehension Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Percy Liang, Robin Jia","submitted_at":"2017-07-23T18:26:29Z","abstract_excerpt":"Standard accuracy metrics indicate that reading comprehension systems are making rapid progress, but the extent to which these systems truly understand language remains unclear. To reward systems with real language understanding abilities, we propose an adversarial evaluation scheme for the Stanford Question Answering Dataset (SQuAD). Our method tests whether systems can answer questions about paragraphs that contain adversarially inserted sentences, which are automatically generated to distract computer systems without changing the correct answer or misleading humans. In this adversarial sett"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1707.07328","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-23T18:26:29Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"aa3de0dd7967a7905ff3d8172ce45205f48851de6f020331efdeb03d5337ad2f","abstract_canon_sha256":"3d8559ef4e84454c14828ec67d2e5c5cd530c11ac17df879303ccaadaa3cdc28"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:39:42.566023Z","signature_b64":"PzuQgcsV9c8Yl4jQ+iK8V7LeHNKdujL5dJigHnjDLJnVaQ6o8SVJK8q0NW2Z90AV334f0KIT9wAMpKNA8RLjDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c9e9e9bf967cbf3279c13d8fc54415216388cf25b05669c585b0f86469789e40","last_reissued_at":"2026-05-18T00:39:42.565334Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:39:42.565334Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Adversarial Examples for Evaluating Reading Comprehension Systems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Percy Liang, Robin Jia","submitted_at":"2017-07-23T18:26:29Z","abstract_excerpt":"Standard accuracy metrics indicate that reading comprehension systems are making rapid progress, but the extent to which these systems truly understand language remains unclear. To reward systems with real language understanding abilities, we propose an adversarial evaluation scheme for the Stanford Question Answering Dataset (SQuAD). Our method tests whether systems can answer questions about paragraphs that contain adversarially inserted sentences, which are automatically generated to distract computer systems without changing the correct answer or misleading humans. In this adversarial sett"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.07328","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1707.07328","created_at":"2026-05-18T00:39:42.565453+00:00"},{"alias_kind":"arxiv_version","alias_value":"1707.07328v1","created_at":"2026-05-18T00:39:42.565453+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.07328","created_at":"2026-05-18T00:39:42.565453+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZHU6TP4WPS7T","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZHU6TP4WPS7TE6OB","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZHU6TP4W","created_at":"2026-05-18T12:31:59.375834+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2509.09192","citing_title":"ReDef: Do Code Language Models Truly Understand Code Changes for Just-in-Time Software Defect Prediction?","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12340","citing_title":"Online Learning-to-Defer with Varying Experts","ref_index":80,"is_internal_anchor":false},{"citing_arxiv_id":"2604.14634","citing_title":"Pushing the Boundaries of Multiple Choice Evaluation to One Hundred Options","ref_index":2,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF","json":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF.json","graph_json":"https://pith.science/api/pith-number/ZHU6TP4WPS7TE6OBHWH4KRAVEF/graph.json","events_json":"https://pith.science/api/pith-number/ZHU6TP4WPS7TE6OBHWH4KRAVEF/events.json","paper":"https://pith.science/paper/ZHU6TP4W"},"agent_actions":{"view_html":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF","download_json":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF.json","view_paper":"https://pith.science/paper/ZHU6TP4W","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1707.07328&json=true","fetch_graph":"https://pith.science/api/pith-number/ZHU6TP4WPS7TE6OBHWH4KRAVEF/graph.json","fetch_events":"https://pith.science/api/pith-number/ZHU6TP4WPS7TE6OBHWH4KRAVEF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF/action/storage_attestation","attest_author":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF/action/author_attestation","sign_citation":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF/action/citation_signature","submit_replication":"https://pith.science/pith/ZHU6TP4WPS7TE6OBHWH4KRAVEF/action/replication_record"}},"created_at":"2026-05-18T00:39:42.565453+00:00","updated_at":"2026-05-18T00:39:42.565453+00:00"}