{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:FG7KS5VDRRNYDLP264EHIH4GWE","short_pith_number":"pith:FG7KS5VD","schema_version":"1.0","canonical_sha256":"29bea976a38c5b81adfaf708741f86b130d7fcdb7ca68bda40dc0a85819470cc","source":{"kind":"arxiv","id":"1904.10729","version":2},"attestation_state":"computed","paper":{"title":"Neural Logic Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Shan Luo, Zhengyao Jiang","submitted_at":"2019-04-24T10:24:35Z","abstract_excerpt":"Deep reinforcement learning (DRL) has achieved significant breakthroughs in various tasks. However, most DRL algorithms suffer a problem of generalizing the learned policy which makes the learning performance largely affected even by minor modifications of the training environment. Except that, the use of deep neural networks makes the learned policies hard to be interpretable. To address these two challenges, we propose a novel algorithm named Neural Logic Reinforcement Learning (NLRL) to represent the policies in reinforcement learning by first-order logic. NLRL is based on policy gradient m"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.10729","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-04-24T10:24:35Z","cross_cats_sorted":[],"title_canon_sha256":"638082926bd3627a0f46b236373bfe1c1a6b364a2ac3ebf4c758ea8e0c89c6c7","abstract_canon_sha256":"1abf819a5c87fc0db2628104989ae215d417b275ee281a3fa837c54e8e1962d4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:00.713530Z","signature_b64":"2PVhEykTLYAL2RKdnzg/DzP/F5TV6Z90EBjuvD1DYQvnnReABgep0GC9QKV93Iv34K/Fnsyqp4kEaKXzfKDjAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"29bea976a38c5b81adfaf708741f86b130d7fcdb7ca68bda40dc0a85819470cc","last_reissued_at":"2026-05-17T23:41:00.713021Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:00.713021Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Neural Logic Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Shan Luo, Zhengyao Jiang","submitted_at":"2019-04-24T10:24:35Z","abstract_excerpt":"Deep reinforcement learning (DRL) has achieved significant breakthroughs in various tasks. However, most DRL algorithms suffer a problem of generalizing the learned policy which makes the learning performance largely affected even by minor modifications of the training environment. Except that, the use of deep neural networks makes the learned policies hard to be interpretable. To address these two challenges, we propose a novel algorithm named Neural Logic Reinforcement Learning (NLRL) to represent the policies in reinforcement learning by first-order logic. NLRL is based on policy gradient m"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.10729","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.10729","created_at":"2026-05-17T23:41:00.713094+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.10729v2","created_at":"2026-05-17T23:41:00.713094+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.10729","created_at":"2026-05-17T23:41:00.713094+00:00"},{"alias_kind":"pith_short_12","alias_value":"FG7KS5VDRRNY","created_at":"2026-05-18T12:33:15.570797+00:00"},{"alias_kind":"pith_short_16","alias_value":"FG7KS5VDRRNYDLP2","created_at":"2026-05-18T12:33:15.570797+00:00"},{"alias_kind":"pith_short_8","alias_value":"FG7KS5VD","created_at":"2026-05-18T12:33:15.570797+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE","json":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE.json","graph_json":"https://pith.science/api/pith-number/FG7KS5VDRRNYDLP264EHIH4GWE/graph.json","events_json":"https://pith.science/api/pith-number/FG7KS5VDRRNYDLP264EHIH4GWE/events.json","paper":"https://pith.science/paper/FG7KS5VD"},"agent_actions":{"view_html":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE","download_json":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE.json","view_paper":"https://pith.science/paper/FG7KS5VD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.10729&json=true","fetch_graph":"https://pith.science/api/pith-number/FG7KS5VDRRNYDLP264EHIH4GWE/graph.json","fetch_events":"https://pith.science/api/pith-number/FG7KS5VDRRNYDLP264EHIH4GWE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE/action/storage_attestation","attest_author":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE/action/author_attestation","sign_citation":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE/action/citation_signature","submit_replication":"https://pith.science/pith/FG7KS5VDRRNYDLP264EHIH4GWE/action/replication_record"}},"created_at":"2026-05-17T23:41:00.713094+00:00","updated_at":"2026-05-17T23:41:00.713094+00:00"}