{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:MA33J7FOYDVH6LJM5L5DHPX7QQ","short_pith_number":"pith:MA33J7FO","schema_version":"1.0","canonical_sha256":"6037b4fcaec0ea7f2d2ceafa33beff8439d246c11cbd29b539e69f709ac539c3","source":{"kind":"arxiv","id":"1811.00937","version":2},"attestation_state":"computed","paper":{"title":"CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Alon Talmor, Jonathan Berant, Jonathan Herzig, Nicholas Lourie","submitted_at":"2018-11-02T15:34:29Z","abstract_excerpt":"When answering a question, people often draw upon their rich world knowledge in addition to the particular context. Recent work has focused primarily on answering questions given some relevant document or context, and required very little general background. To investigate question answering with prior knowledge, we present CommonsenseQA: a challenging new dataset for commonsense question answering. To capture common sense beyond associations, we extract from ConceptNet (Speer et al., 2017) multiple target concepts that have the same semantic relation to a single source concept. Crowd-workers "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.00937","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-11-02T15:34:29Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"db9d0e0afdba8b3eda6450bea3be99938c01e47364ace17491cc4400a062b55e","abstract_canon_sha256":"c9771d5ee33a347a3f6990a858f344cf6e0f3933574e72eb76d03b3430b2f19d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:51:09.499986Z","signature_b64":"hgmjr0zz3H8APsv+vMdqASS6c189fqQsaIhQjtHrSq+vwwmB97kmd1GC/q84my79xfgvOFyRU9hhxc+9QmGoCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6037b4fcaec0ea7f2d2ceafa33beff8439d246c11cbd29b539e69f709ac539c3","last_reissued_at":"2026-05-17T23:51:09.499336Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:51:09.499336Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Alon Talmor, Jonathan Berant, Jonathan Herzig, Nicholas Lourie","submitted_at":"2018-11-02T15:34:29Z","abstract_excerpt":"When answering a question, people often draw upon their rich world knowledge in addition to the particular context. Recent work has focused primarily on answering questions given some relevant document or context, and required very little general background. To investigate question answering with prior knowledge, we present CommonsenseQA: a challenging new dataset for commonsense question answering. To capture common sense beyond associations, we extract from ConceptNet (Speer et al., 2017) multiple target concepts that have the same semantic relation to a single source concept. Crowd-workers "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.00937","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.00937","created_at":"2026-05-17T23:51:09.499449+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.00937v2","created_at":"2026-05-17T23:51:09.499449+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.00937","created_at":"2026-05-17T23:51:09.499449+00:00"},{"alias_kind":"pith_short_12","alias_value":"MA33J7FOYDVH","created_at":"2026-05-18T12:32:37.024351+00:00"},{"alias_kind":"pith_short_16","alias_value":"MA33J7FOYDVH6LJM","created_at":"2026-05-18T12:32:37.024351+00:00"},{"alias_kind":"pith_short_8","alias_value":"MA33J7FO","created_at":"2026-05-18T12:32:37.024351+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":22,"internal_anchor_count":20,"sample":[{"citing_arxiv_id":"2110.14168","citing_title":"Training Verifiers to Solve Math Word Problems","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2310.06825","citing_title":"Mistral 7B","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2312.10997","citing_title":"Retrieval-Augmented Generation for Large Language Models: A Survey","ref_index":132,"is_internal_anchor":true},{"citing_arxiv_id":"2401.04088","citing_title":"Mixtral of Experts","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2406.13621","citing_title":"LaMI: Augmenting Large Language Models via Late Multi-Image Fusion","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2411.11707","citing_title":"Federated Co-tuning Framework for Large and Small Language Models","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2502.00270","citing_title":"DUET: Optimizing Training Data Mixtures via Feedback from Unseen Evaluation Tasks","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2502.12120","citing_title":"LLMs on the Line: Data Determines Loss-to-Loss Scaling Laws","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"2504.16155","citing_title":"PRIMETIME : Limits of LLMs in Temporal Primitives","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2507.20906","citing_title":"Soft Head Selection for Injecting ICL-Derived Task Embeddings","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2508.08127","citing_title":"BlindGuard: Safeguarding LLM-based Multi-Agent Systems under Unknown Attacks","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2509.24496","citing_title":"LLM DNA: Tracing Model Evolution via Functional Representations","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2511.02627","citing_title":"DecompSR: A dataset for decomposed analyses of compositional multihop spatial reasoning","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2310.16789","citing_title":"Detecting Pretraining Data from Large Language Models","ref_index":124,"is_internal_anchor":true},{"citing_arxiv_id":"2306.14048","citing_title":"H$_2$O: Heavy-Hitter Oracle for Efficient Generative Inference of Large Language Models","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"2511.21613","citing_title":"Beyond URLs: Metadata Diversity and Position for Efficient LLM Pretraining","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2512.13751","citing_title":"MIDUS: Memory-Infused Depth Up-Scaling","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2509.19349","citing_title":"ShinkaEvolve: Towards Open-Ended And Sample-Efficient Program Evolution","ref_index":257,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04944","citing_title":"Inclusion-of-Thoughts: Mitigating Preference Instability via Purifying the Decision Space","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2603.25412","citing_title":"Beyond Content Safety: Real-Time Monitoring for Reasoning Vulnerabilities in Large Language Models","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2211.09085","citing_title":"Galactica: A Large Language Model for Science","ref_index":241,"is_internal_anchor":false},{"citing_arxiv_id":"2604.12946","citing_title":"Parcae: Scaling Laws For Stable Looped Language Models","ref_index":77,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ","json":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ.json","graph_json":"https://pith.science/api/pith-number/MA33J7FOYDVH6LJM5L5DHPX7QQ/graph.json","events_json":"https://pith.science/api/pith-number/MA33J7FOYDVH6LJM5L5DHPX7QQ/events.json","paper":"https://pith.science/paper/MA33J7FO"},"agent_actions":{"view_html":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ","download_json":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ.json","view_paper":"https://pith.science/paper/MA33J7FO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.00937&json=true","fetch_graph":"https://pith.science/api/pith-number/MA33J7FOYDVH6LJM5L5DHPX7QQ/graph.json","fetch_events":"https://pith.science/api/pith-number/MA33J7FOYDVH6LJM5L5DHPX7QQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ/action/storage_attestation","attest_author":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ/action/author_attestation","sign_citation":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ/action/citation_signature","submit_replication":"https://pith.science/pith/MA33J7FOYDVH6LJM5L5DHPX7QQ/action/replication_record"}},"created_at":"2026-05-17T23:51:09.499449+00:00","updated_at":"2026-05-17T23:51:09.499449+00:00"}