{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:MZSWUP7SI5FMPZF72PM7OI3HPZ","short_pith_number":"pith:MZSWUP7S","schema_version":"1.0","canonical_sha256":"66656a3ff2474ac7e4bfd3d9f723677e50be32bf67b715f63f03b47105af837e","source":{"kind":"arxiv","id":"1809.11074","version":3},"attestation_state":"computed","paper":{"title":"Robot Representation and Reasoning with Knowledge from Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Keting Lu, Peter Stone, Shiqi Zhang, Xiaoping Chen","submitted_at":"2018-09-28T15:02:21Z","abstract_excerpt":"Reinforcement learning (RL) agents aim at learning by interacting with an environment, and are not designed for representing or reasoning with declarative knowledge. Knowledge representation and reasoning (KRR) paradigms are strong in declarative KRR tasks, but are ill-equipped to learn from such experiences. In this work, we integrate logical-probabilistic KRR with model-based RL, enabling agents to simultaneously reason with declarative knowledge and learn from interaction experiences. The knowledge from humans and RL is unified and used for dynamically computing task-specific planning model"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.11074","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-09-28T15:02:21Z","cross_cats_sorted":[],"title_canon_sha256":"448651c75a37b3f1f5fa6751a5ab423b092a7205008727bde0b517cac75901a7","abstract_canon_sha256":"a5b9ee16c2c3ea22f75de8b49767e6d42e476f7acab47bb09accd8f93c7a5b03"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:07.989839Z","signature_b64":"cOn6yjbipWtcHWDv4PLQTNlKFjosJYhlhwwCQeHsG/j/NzIYAZNR9WZ137uXUbB5xOjIEWbCkS6KACTD3OOOCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"66656a3ff2474ac7e4bfd3d9f723677e50be32bf67b715f63f03b47105af837e","last_reissued_at":"2026-05-18T00:00:07.989283Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:07.989283Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Robot Representation and Reasoning with Knowledge from Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Keting Lu, Peter Stone, Shiqi Zhang, Xiaoping Chen","submitted_at":"2018-09-28T15:02:21Z","abstract_excerpt":"Reinforcement learning (RL) agents aim at learning by interacting with an environment, and are not designed for representing or reasoning with declarative knowledge. Knowledge representation and reasoning (KRR) paradigms are strong in declarative KRR tasks, but are ill-equipped to learn from such experiences. In this work, we integrate logical-probabilistic KRR with model-based RL, enabling agents to simultaneously reason with declarative knowledge and learn from interaction experiences. The knowledge from humans and RL is unified and used for dynamically computing task-specific planning model"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.11074","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.11074","created_at":"2026-05-18T00:00:07.989389+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.11074v3","created_at":"2026-05-18T00:00:07.989389+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.11074","created_at":"2026-05-18T00:00:07.989389+00:00"},{"alias_kind":"pith_short_12","alias_value":"MZSWUP7SI5FM","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_16","alias_value":"MZSWUP7SI5FMPZF7","created_at":"2026-05-18T12:32:40.477152+00:00"},{"alias_kind":"pith_short_8","alias_value":"MZSWUP7S","created_at":"2026-05-18T12:32:40.477152+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ","json":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ.json","graph_json":"https://pith.science/api/pith-number/MZSWUP7SI5FMPZF72PM7OI3HPZ/graph.json","events_json":"https://pith.science/api/pith-number/MZSWUP7SI5FMPZF72PM7OI3HPZ/events.json","paper":"https://pith.science/paper/MZSWUP7S"},"agent_actions":{"view_html":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ","download_json":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ.json","view_paper":"https://pith.science/paper/MZSWUP7S","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.11074&json=true","fetch_graph":"https://pith.science/api/pith-number/MZSWUP7SI5FMPZF72PM7OI3HPZ/graph.json","fetch_events":"https://pith.science/api/pith-number/MZSWUP7SI5FMPZF72PM7OI3HPZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ/action/storage_attestation","attest_author":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ/action/author_attestation","sign_citation":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ/action/citation_signature","submit_replication":"https://pith.science/pith/MZSWUP7SI5FMPZF72PM7OI3HPZ/action/replication_record"}},"created_at":"2026-05-18T00:00:07.989389+00:00","updated_at":"2026-05-18T00:00:07.989389+00:00"}