{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:G66LDEMEIN2G2TD575YVSJO6RB","short_pith_number":"pith:G66LDEME","schema_version":"1.0","canonical_sha256":"37bcb1918443746d4c7dff715925de8865ca69072065554c57d48a8e11293e85","source":{"kind":"arxiv","id":"2606.10979","version":1},"attestation_state":"computed","paper":{"title":"Bellman-Taylor Score Decoding for Markov Decision Processes with State-Dependent Feasible Action Sets","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Dongyan (Lucy) Huo, Qiang Chen, Rushuai Yang, Yi Chen","submitted_at":"2026-06-09T15:15:21Z","abstract_excerpt":"Many Markov decision processes (MDPs) in operations research have feasible actions that are state dependent and defined implicitly by various operational constraints. These features make it difficult to use standard deep reinforcement learning (DRL) algorithms, whose action interfaces typically assume either a fixed finite action catalog or a simple Euclidean space. Motivated by a Taylor expansion of the optimal action-value function, we propose Bellman--Taylor score decoding, a framework that moves policy learning to a Euclidean score space while enforcing feasibility through an action decode"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.10979","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-09T15:15:21Z","cross_cats_sorted":[],"title_canon_sha256":"27c529adbd61ec523f35d079eebbb7fbb702ef5a8e121ba6eccfc141edc98d47","abstract_canon_sha256":"2db6bf7a50635d041d8f4c3eeb8b3401473a90e3dafbe6008f18e391422cb546"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:10:51.045608Z","signature_b64":"z1h2YSOs6JsiQFeDFftXW5useJEPt3/jkH4ubUpzcxy+8padyRE+8HFPWtjw0/jrBD9QfVixbnjbBe9XSc7XDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"37bcb1918443746d4c7dff715925de8865ca69072065554c57d48a8e11293e85","last_reissued_at":"2026-06-10T01:10:51.044782Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:10:51.044782Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Bellman-Taylor Score Decoding for Markov Decision Processes with State-Dependent Feasible Action Sets","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Dongyan (Lucy) Huo, Qiang Chen, Rushuai Yang, Yi Chen","submitted_at":"2026-06-09T15:15:21Z","abstract_excerpt":"Many Markov decision processes (MDPs) in operations research have feasible actions that are state dependent and defined implicitly by various operational constraints. These features make it difficult to use standard deep reinforcement learning (DRL) algorithms, whose action interfaces typically assume either a fixed finite action catalog or a simple Euclidean space. Motivated by a Taylor expansion of the optimal action-value function, we propose Bellman--Taylor score decoding, a framework that moves policy learning to a Euclidean score space while enforcing feasibility through an action decode"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.10979","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.10979/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.10979","created_at":"2026-06-10T01:10:51.044909+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.10979v1","created_at":"2026-06-10T01:10:51.044909+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.10979","created_at":"2026-06-10T01:10:51.044909+00:00"},{"alias_kind":"pith_short_12","alias_value":"G66LDEMEIN2G","created_at":"2026-06-10T01:10:51.044909+00:00"},{"alias_kind":"pith_short_16","alias_value":"G66LDEMEIN2G2TD5","created_at":"2026-06-10T01:10:51.044909+00:00"},{"alias_kind":"pith_short_8","alias_value":"G66LDEME","created_at":"2026-06-10T01:10:51.044909+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB","json":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB.json","graph_json":"https://pith.science/api/pith-number/G66LDEMEIN2G2TD575YVSJO6RB/graph.json","events_json":"https://pith.science/api/pith-number/G66LDEMEIN2G2TD575YVSJO6RB/events.json","paper":"https://pith.science/paper/G66LDEME"},"agent_actions":{"view_html":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB","download_json":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB.json","view_paper":"https://pith.science/paper/G66LDEME","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.10979&json=true","fetch_graph":"https://pith.science/api/pith-number/G66LDEMEIN2G2TD575YVSJO6RB/graph.json","fetch_events":"https://pith.science/api/pith-number/G66LDEMEIN2G2TD575YVSJO6RB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB/action/storage_attestation","attest_author":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB/action/author_attestation","sign_citation":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB/action/citation_signature","submit_replication":"https://pith.science/pith/G66LDEMEIN2G2TD575YVSJO6RB/action/replication_record"}},"created_at":"2026-06-10T01:10:51.044909+00:00","updated_at":"2026-06-10T01:10:51.044909+00:00"}