{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:MDV73Y2DKSYSPIQL5IIIPKIICK","short_pith_number":"pith:MDV73Y2D","schema_version":"1.0","canonical_sha256":"60ebfde34354b127a20bea1087a90812bfda2bbb545c34f3a692bbe053037db4","source":{"kind":"arxiv","id":"2510.11713","version":4},"attestation_state":"computed","paper":{"title":"Are Large Reasoning Models Interruptible?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"David M. Chan, Joseph E. Gonzalez, Mihran Miroyan, Narges Norouzi, Trevor Darrell, Tsung-Han Wu","submitted_at":"2025-10-13T17:59:35Z","abstract_excerpt":"Real-world applications of Large Reasoning Models (LRMs) often require reasoning about changing prompts or environments. In this work, we challenge the frozen world assumption and evaluate LRM robustness under two realistic dynamic scenarios: interruptions, which test the accuracy of model responses under budget-constrained outputs, and dynamic context, which tests model adaptation to in-flight changes. Across mathematics and programming benchmarks that require long-form reasoning, static evaluations consistently overestimate robustness: even state-of-the-art LRMs, which achieve high accuracy "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.11713","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-10-13T17:59:35Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"732702dbd845fadd5e636a942888285ac11459abc44514e7930b02769598b5a4","abstract_canon_sha256":"c4d30597c6974d4877017e59e636aeba33d53925548bc8fddee4453dc27dae83"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T03:05:03.993294Z","signature_b64":"Y91R/G9PB660L3QCeAkRKyr1lcJOhcZ3Kb00qfdUnWDc8Cu3EnsI8qUfui9aeWHMxhQoaZmBTov02toWGcL7Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"60ebfde34354b127a20bea1087a90812bfda2bbb545c34f3a692bbe053037db4","last_reissued_at":"2026-06-02T03:05:03.992752Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T03:05:03.992752Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Are Large Reasoning Models Interruptible?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"David M. Chan, Joseph E. Gonzalez, Mihran Miroyan, Narges Norouzi, Trevor Darrell, Tsung-Han Wu","submitted_at":"2025-10-13T17:59:35Z","abstract_excerpt":"Real-world applications of Large Reasoning Models (LRMs) often require reasoning about changing prompts or environments. In this work, we challenge the frozen world assumption and evaluate LRM robustness under two realistic dynamic scenarios: interruptions, which test the accuracy of model responses under budget-constrained outputs, and dynamic context, which tests model adaptation to in-flight changes. Across mathematics and programming benchmarks that require long-form reasoning, static evaluations consistently overestimate robustness: even state-of-the-art LRMs, which achieve high accuracy "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.11713","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.11713/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.11713","created_at":"2026-06-02T03:05:03.992823+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.11713v4","created_at":"2026-06-02T03:05:03.992823+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.11713","created_at":"2026-06-02T03:05:03.992823+00:00"},{"alias_kind":"pith_short_12","alias_value":"MDV73Y2DKSYS","created_at":"2026-06-02T03:05:03.992823+00:00"},{"alias_kind":"pith_short_16","alias_value":"MDV73Y2DKSYSPIQL","created_at":"2026-06-02T03:05:03.992823+00:00"},{"alias_kind":"pith_short_8","alias_value":"MDV73Y2D","created_at":"2026-06-02T03:05:03.992823+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK","json":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK.json","graph_json":"https://pith.science/api/pith-number/MDV73Y2DKSYSPIQL5IIIPKIICK/graph.json","events_json":"https://pith.science/api/pith-number/MDV73Y2DKSYSPIQL5IIIPKIICK/events.json","paper":"https://pith.science/paper/MDV73Y2D"},"agent_actions":{"view_html":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK","download_json":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK.json","view_paper":"https://pith.science/paper/MDV73Y2D","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.11713&json=true","fetch_graph":"https://pith.science/api/pith-number/MDV73Y2DKSYSPIQL5IIIPKIICK/graph.json","fetch_events":"https://pith.science/api/pith-number/MDV73Y2DKSYSPIQL5IIIPKIICK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK/action/storage_attestation","attest_author":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK/action/author_attestation","sign_citation":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK/action/citation_signature","submit_replication":"https://pith.science/pith/MDV73Y2DKSYSPIQL5IIIPKIICK/action/replication_record"}},"created_at":"2026-06-02T03:05:03.992823+00:00","updated_at":"2026-06-02T03:05:03.992823+00:00"}