{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:54U4UJWFPFMYC2CMHKUOC4OSEX","short_pith_number":"pith:54U4UJWF","schema_version":"1.0","canonical_sha256":"ef29ca26c5795981684c3aa8e171d225dd96cb49737bfc89ec22233b1957e086","source":{"kind":"arxiv","id":"2605.18549","version":1},"attestation_state":"computed","paper":{"title":"Monitoring the Internal Monologue: Probe Trajectories Reveal Reasoning Dynamics","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CR"],"primary_cat":"cs.CL","authors_text":"Aleksander Szymczyk, Maciej Chrab\\k{a}szcz, Marcin Sendera, Sebastian Cygert, Tomasz Trzci\\'nski","submitted_at":"2026-05-18T15:29:04Z","abstract_excerpt":"Large Reasoning Models (LRMs) introduce new opportunities for safety monitoring through their Chain of Thought (CoT) reasoning. However, CoT is not always faithful to the model's final output, undermining its reliability as a monitoring tool. To address this, we investigate the hidden representations of LRMs to determine whether future behavior can be predicted from prompt and CoT representations. By evaluating a probe at each generated token, we construct a probe trajectory, the continuous evolution of a concept's probability across the reasoning process. We find that future model behavior is"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.18549","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-18T15:29:04Z","cross_cats_sorted":["cs.CR"],"title_canon_sha256":"b191ef90201871110066708a2d5c652285863cd538bb48ce8511af79324c8750","abstract_canon_sha256":"3c7106cf177554313769925cd74abd958c8f11da73c34d563804cf6ce1d44e40"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:06:07.208934Z","signature_b64":"KTEJSKX/99W+lBvszq6LHBpMf+kNJ2va3ZyovLL6Wa5ZqGldlEE8Hh3qApd2IR1VC2DuoYWh4lCgR7fJ+KTiBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ef29ca26c5795981684c3aa8e171d225dd96cb49737bfc89ec22233b1957e086","last_reissued_at":"2026-05-20T00:06:07.208070Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:06:07.208070Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Monitoring the Internal Monologue: Probe Trajectories Reveal Reasoning Dynamics","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CR"],"primary_cat":"cs.CL","authors_text":"Aleksander Szymczyk, Maciej Chrab\\k{a}szcz, Marcin Sendera, Sebastian Cygert, Tomasz Trzci\\'nski","submitted_at":"2026-05-18T15:29:04Z","abstract_excerpt":"Large Reasoning Models (LRMs) introduce new opportunities for safety monitoring through their Chain of Thought (CoT) reasoning. However, CoT is not always faithful to the model's final output, undermining its reliability as a monitoring tool. To address this, we investigate the hidden representations of LRMs to determine whether future behavior can be predicted from prompt and CoT representations. By evaluating a probe at each generated token, we construct a probe trajectory, the continuous evolution of a concept's probability across the reasoning process. We find that future model behavior is"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18549","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18549/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.18549","created_at":"2026-05-20T00:06:07.208222+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.18549v1","created_at":"2026-05-20T00:06:07.208222+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18549","created_at":"2026-05-20T00:06:07.208222+00:00"},{"alias_kind":"pith_short_12","alias_value":"54U4UJWFPFMY","created_at":"2026-05-20T00:06:07.208222+00:00"},{"alias_kind":"pith_short_16","alias_value":"54U4UJWFPFMYC2CM","created_at":"2026-05-20T00:06:07.208222+00:00"},{"alias_kind":"pith_short_8","alias_value":"54U4UJWF","created_at":"2026-05-20T00:06:07.208222+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX","json":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX.json","graph_json":"https://pith.science/api/pith-number/54U4UJWFPFMYC2CMHKUOC4OSEX/graph.json","events_json":"https://pith.science/api/pith-number/54U4UJWFPFMYC2CMHKUOC4OSEX/events.json","paper":"https://pith.science/paper/54U4UJWF"},"agent_actions":{"view_html":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX","download_json":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX.json","view_paper":"https://pith.science/paper/54U4UJWF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.18549&json=true","fetch_graph":"https://pith.science/api/pith-number/54U4UJWFPFMYC2CMHKUOC4OSEX/graph.json","fetch_events":"https://pith.science/api/pith-number/54U4UJWFPFMYC2CMHKUOC4OSEX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX/action/storage_attestation","attest_author":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX/action/author_attestation","sign_citation":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX/action/citation_signature","submit_replication":"https://pith.science/pith/54U4UJWFPFMYC2CMHKUOC4OSEX/action/replication_record"}},"created_at":"2026-05-20T00:06:07.208222+00:00","updated_at":"2026-05-20T00:06:07.208222+00:00"}