{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:SX2HRV7IS5NOZRDHPQD7CD5OFU","short_pith_number":"pith:SX2HRV7I","schema_version":"1.0","canonical_sha256":"95f478d7e8975aecc4677c07f10fae2d373bec7b86807295554c2714693bb726","source":{"kind":"arxiv","id":"2606.27103","version":1},"attestation_state":"computed","paper":{"title":"The Riddle Riddle: Testing Flexible Reasoning in Large Language Models and Humans","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bella Fascendini, Kathryn McGregor, Max D. Gupta, Thomas L. Griffiths","submitted_at":"2026-06-25T14:41:12Z","abstract_excerpt":"Humans flexibly adapt their reasoning strategies to the requirements of a given problem. Large language models (LLMs) have performed well on many cognitive tasks, however, it is unclear whether this accuracy is a result of pattern matching from training data or flexible reasoning. Here, we introduce a novel paradigm to test this question: the riddle riddle paradigm. Riddle riddles are word problems written to mimic popular riddles, but altered so their answers only require literal interpretations. Identifying correct answers requires looking past the structure of each question and flexibly app"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.27103","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-25T14:41:12Z","cross_cats_sorted":[],"title_canon_sha256":"f509cd3e0d3f84f4e77d77397b4a00be9e98a72912fc08b087ad49cf345f2421","abstract_canon_sha256":"6eb7ca4b4d13abcbcf7784ead08ffebcf28dcd0185ca21c72d229e914bd54d9c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:16:09.414300Z","signature_b64":"IZx9WoPNzJ7FWm3kHwWqvkthW2tMqZYSA+wKp2z2jtmjh7akbDqLCy6m3DQUrfL8gMLIdIAohUQW3AhLg8QpBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"95f478d7e8975aecc4677c07f10fae2d373bec7b86807295554c2714693bb726","last_reissued_at":"2026-06-26T01:16:09.413933Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:16:09.413933Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Riddle Riddle: Testing Flexible Reasoning in Large Language Models and Humans","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bella Fascendini, Kathryn McGregor, Max D. Gupta, Thomas L. Griffiths","submitted_at":"2026-06-25T14:41:12Z","abstract_excerpt":"Humans flexibly adapt their reasoning strategies to the requirements of a given problem. Large language models (LLMs) have performed well on many cognitive tasks, however, it is unclear whether this accuracy is a result of pattern matching from training data or flexible reasoning. Here, we introduce a novel paradigm to test this question: the riddle riddle paradigm. Riddle riddles are word problems written to mimic popular riddles, but altered so their answers only require literal interpretations. Identifying correct answers requires looking past the structure of each question and flexibly app"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.27103","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.27103/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.27103","created_at":"2026-06-26T01:16:09.413990+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.27103v1","created_at":"2026-06-26T01:16:09.413990+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.27103","created_at":"2026-06-26T01:16:09.413990+00:00"},{"alias_kind":"pith_short_12","alias_value":"SX2HRV7IS5NO","created_at":"2026-06-26T01:16:09.413990+00:00"},{"alias_kind":"pith_short_16","alias_value":"SX2HRV7IS5NOZRDH","created_at":"2026-06-26T01:16:09.413990+00:00"},{"alias_kind":"pith_short_8","alias_value":"SX2HRV7I","created_at":"2026-06-26T01:16:09.413990+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU","json":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU.json","graph_json":"https://pith.science/api/pith-number/SX2HRV7IS5NOZRDHPQD7CD5OFU/graph.json","events_json":"https://pith.science/api/pith-number/SX2HRV7IS5NOZRDHPQD7CD5OFU/events.json","paper":"https://pith.science/paper/SX2HRV7I"},"agent_actions":{"view_html":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU","download_json":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU.json","view_paper":"https://pith.science/paper/SX2HRV7I","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.27103&json=true","fetch_graph":"https://pith.science/api/pith-number/SX2HRV7IS5NOZRDHPQD7CD5OFU/graph.json","fetch_events":"https://pith.science/api/pith-number/SX2HRV7IS5NOZRDHPQD7CD5OFU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU/action/storage_attestation","attest_author":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU/action/author_attestation","sign_citation":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU/action/citation_signature","submit_replication":"https://pith.science/pith/SX2HRV7IS5NOZRDHPQD7CD5OFU/action/replication_record"}},"created_at":"2026-06-26T01:16:09.413990+00:00","updated_at":"2026-06-26T01:16:09.413990+00:00"}