{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:75CFN56VKNNFRHQW7MUCIDYYTA","short_pith_number":"pith:75CFN56V","schema_version":"1.0","canonical_sha256":"ff4456f7d5535a589e16fb28240f189834e6189f1d7117c70495d132f83746a6","source":{"kind":"arxiv","id":"2508.16420","version":3},"attestation_state":"computed","paper":{"title":"Hybrid Sequence Modeling and Reinforced Verification for Controllable Target-Conditioned Decision Making","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chao Gao, Haogang Zhu, Hao Sheng, Hongming Zhang, Liang Lin, Martin M\\\"uller, Mengxiao Zhu, Yingying Zhang, Yue Pei, Ziliang Chen","submitted_at":"2025-08-22T14:30:53Z","abstract_excerpt":"Target-conditioned sequence models provide a simple interface for controllable offline decision making, but the requested target return can be an unreliable control signal, especially when the target return lies in underrepresented regions of the dataset. This paper proposes Doctor, a hybrid sequence modeling and reinforced verification framework for controllable target-conditioned offline decision making. Doctor trains a shared masked trajectory Transformer with two complementary objectives: masked trajectory reconstruction for candidate generation and in-sample value learning for action-valu"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2508.16420","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-08-22T14:30:53Z","cross_cats_sorted":[],"title_canon_sha256":"8673f9c0fefa351ccf45f27accbb1d2a771f917f8a191c1d7db75a9e7e08c612","abstract_canon_sha256":"8e07a7d3a127db444fd8ec2043f278483dca7bf5f8adc4e338e705880fe555b7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:14:58.113860Z","signature_b64":"Tdq9D1Fv5YNmZwgYH9Cxd+4wWTBpw6Mb8iIlCSb/cY0Gv53QiHPKZDgzc2lr/eITwVSZIFUrPnQg1ui0l3prCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ff4456f7d5535a589e16fb28240f189834e6189f1d7117c70495d132f83746a6","last_reissued_at":"2026-06-24T01:14:58.113350Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:14:58.113350Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Hybrid Sequence Modeling and Reinforced Verification for Controllable Target-Conditioned Decision Making","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chao Gao, Haogang Zhu, Hao Sheng, Hongming Zhang, Liang Lin, Martin M\\\"uller, Mengxiao Zhu, Yingying Zhang, Yue Pei, Ziliang Chen","submitted_at":"2025-08-22T14:30:53Z","abstract_excerpt":"Target-conditioned sequence models provide a simple interface for controllable offline decision making, but the requested target return can be an unreliable control signal, especially when the target return lies in underrepresented regions of the dataset. This paper proposes Doctor, a hybrid sequence modeling and reinforced verification framework for controllable target-conditioned offline decision making. Doctor trains a shared masked trajectory Transformer with two complementary objectives: masked trajectory reconstruction for candidate generation and in-sample value learning for action-valu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.16420","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.16420/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2508.16420","created_at":"2026-06-24T01:14:58.113412+00:00"},{"alias_kind":"arxiv_version","alias_value":"2508.16420v3","created_at":"2026-06-24T01:14:58.113412+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.16420","created_at":"2026-06-24T01:14:58.113412+00:00"},{"alias_kind":"pith_short_12","alias_value":"75CFN56VKNNF","created_at":"2026-06-24T01:14:58.113412+00:00"},{"alias_kind":"pith_short_16","alias_value":"75CFN56VKNNFRHQW","created_at":"2026-06-24T01:14:58.113412+00:00"},{"alias_kind":"pith_short_8","alias_value":"75CFN56V","created_at":"2026-06-24T01:14:58.113412+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA","json":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA.json","graph_json":"https://pith.science/api/pith-number/75CFN56VKNNFRHQW7MUCIDYYTA/graph.json","events_json":"https://pith.science/api/pith-number/75CFN56VKNNFRHQW7MUCIDYYTA/events.json","paper":"https://pith.science/paper/75CFN56V"},"agent_actions":{"view_html":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA","download_json":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA.json","view_paper":"https://pith.science/paper/75CFN56V","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2508.16420&json=true","fetch_graph":"https://pith.science/api/pith-number/75CFN56VKNNFRHQW7MUCIDYYTA/graph.json","fetch_events":"https://pith.science/api/pith-number/75CFN56VKNNFRHQW7MUCIDYYTA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA/action/storage_attestation","attest_author":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA/action/author_attestation","sign_citation":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA/action/citation_signature","submit_replication":"https://pith.science/pith/75CFN56VKNNFRHQW7MUCIDYYTA/action/replication_record"}},"created_at":"2026-06-24T01:14:58.113412+00:00","updated_at":"2026-06-24T01:14:58.113412+00:00"}