{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:P7CN5XXUMSIXDFUI4MC24FR5YB","short_pith_number":"pith:P7CN5XXU","schema_version":"1.0","canonical_sha256":"7fc4dedef46491719688e305ae163dc079b5b0c6705e1d939c2ff8641a5a9d88","source":{"kind":"arxiv","id":"2310.18127","version":2},"attestation_state":"computed","paper":{"title":"Ask more, know better: Reinforce-Learned Prompt Questions for Decision Making with Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"David Henry Mguni, Filippos Christianos, Haifeng Zhang, Jun Wang, Xinyu Cui, Xue Yan, Yan Song","submitted_at":"2023-10-27T13:19:19Z","abstract_excerpt":"Large language models (LLMs) demonstrate their promise in tackling complicated practical challenges by combining action-based policies with chain of thought (CoT) reasoning. Having high-quality prompts on hand, however, is vital to the framework's effectiveness. Currently, these prompts are handcrafted utilising extensive human labor, resulting in CoT policies that frequently fail to generalise. Human intervention is also required to develop grounding functions that ensure low-level controllers appropriately process CoT reasoning. In this paper, we propose a comprehensive training framework fo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2310.18127","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-10-27T13:19:19Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"48f114a7b7871ad29e78499dbf0110dfcf335faafd50bc475f405cfb7d963b7f","abstract_canon_sha256":"9c55bc8ee156c44336787e5cdab26500f788d2d47779476c5e5aabce661fc7bf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T07:50:21.294685Z","signature_b64":"DUVYw14ZaaCxG2FN34dBHojs9zybsC/vbJQvGs46ASQZgzXZYv0RBNMEC6zf9iZ6X3D3jdX8pqsI8nXM3OYoBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7fc4dedef46491719688e305ae163dc079b5b0c6705e1d939c2ff8641a5a9d88","last_reissued_at":"2026-07-05T07:50:21.294194Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T07:50:21.294194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Ask more, know better: Reinforce-Learned Prompt Questions for Decision Making with Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"David Henry Mguni, Filippos Christianos, Haifeng Zhang, Jun Wang, Xinyu Cui, Xue Yan, Yan Song","submitted_at":"2023-10-27T13:19:19Z","abstract_excerpt":"Large language models (LLMs) demonstrate their promise in tackling complicated practical challenges by combining action-based policies with chain of thought (CoT) reasoning. Having high-quality prompts on hand, however, is vital to the framework's effectiveness. Currently, these prompts are handcrafted utilising extensive human labor, resulting in CoT policies that frequently fail to generalise. Human intervention is also required to develop grounding functions that ensure low-level controllers appropriately process CoT reasoning. In this paper, we propose a comprehensive training framework fo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2310.18127","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2310.18127/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2310.18127","created_at":"2026-07-05T07:50:21.294251+00:00"},{"alias_kind":"arxiv_version","alias_value":"2310.18127v2","created_at":"2026-07-05T07:50:21.294251+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2310.18127","created_at":"2026-07-05T07:50:21.294251+00:00"},{"alias_kind":"pith_short_12","alias_value":"P7CN5XXUMSIX","created_at":"2026-07-05T07:50:21.294251+00:00"},{"alias_kind":"pith_short_16","alias_value":"P7CN5XXUMSIXDFUI","created_at":"2026-07-05T07:50:21.294251+00:00"},{"alias_kind":"pith_short_8","alias_value":"P7CN5XXU","created_at":"2026-07-05T07:50:21.294251+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2606.23668","citing_title":"On the Limits of Prompt-Conditioned Language Models as General-Purpose Learners","ref_index":14,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB","json":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB.json","graph_json":"https://pith.science/api/pith-number/P7CN5XXUMSIXDFUI4MC24FR5YB/graph.json","events_json":"https://pith.science/api/pith-number/P7CN5XXUMSIXDFUI4MC24FR5YB/events.json","paper":"https://pith.science/paper/P7CN5XXU"},"agent_actions":{"view_html":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB","download_json":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB.json","view_paper":"https://pith.science/paper/P7CN5XXU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2310.18127&json=true","fetch_graph":"https://pith.science/api/pith-number/P7CN5XXUMSIXDFUI4MC24FR5YB/graph.json","fetch_events":"https://pith.science/api/pith-number/P7CN5XXUMSIXDFUI4MC24FR5YB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB/action/storage_attestation","attest_author":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB/action/author_attestation","sign_citation":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB/action/citation_signature","submit_replication":"https://pith.science/pith/P7CN5XXUMSIXDFUI4MC24FR5YB/action/replication_record"}},"created_at":"2026-07-05T07:50:21.294251+00:00","updated_at":"2026-07-05T07:50:21.294251+00:00"}