{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:YYCX3NQZJXZEHCZP7OEPNCP4DU","short_pith_number":"pith:YYCX3NQZ","schema_version":"1.0","canonical_sha256":"c6057db6194df2438b2ffb88f689fc1d25edebb5a1186368bf083970b11f20a4","source":{"kind":"arxiv","id":"2507.11482","version":4},"attestation_state":"computed","paper":{"title":"Illuminating the Three Dogmas of Reinforcement Learning under Evolutionary Light","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Mani Hamidi, Terrence W. Deacon","submitted_at":"2025-07-15T16:53:14Z","abstract_excerpt":"Artificial learning systems are graduating from passive learners to increasingly autonomous agents, lending pragmatic urgency to the question of what constitutes agency. Reinforcement learning (RL) offers arguably the most explicit formulation of agent-environment interaction, built on three core tenets: the environment as a Markov decision process, learning as policy optimization, and the agent as a maximizer of scalar reward. Recent work has called to revise these tenets: reconceptualizing learning as adaptation rather than optimization, broadening goals beyond scalar reward, and noting the "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2507.11482","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-07-15T16:53:14Z","cross_cats_sorted":[],"title_canon_sha256":"f0737e9a285a7958c1298271dd454248b1f78bce18cb9c676ddef4cf1e2c9e17","abstract_canon_sha256":"96169203e341b8cd10da75533ee2c5c43b3b22a53792130307880771b55763a7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:13:16.024834Z","signature_b64":"Pfuqen+oQ8yYCmcBEN0uaboKVksln176DDGWqegiJMAmQN6oTtjXGDK8IXSCkGY08avRuhVJr6wUDL4k2TJkAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c6057db6194df2438b2ffb88f689fc1d25edebb5a1186368bf083970b11f20a4","last_reissued_at":"2026-06-23T02:13:16.024359Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:13:16.024359Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Illuminating the Three Dogmas of Reinforcement Learning under Evolutionary Light","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Mani Hamidi, Terrence W. Deacon","submitted_at":"2025-07-15T16:53:14Z","abstract_excerpt":"Artificial learning systems are graduating from passive learners to increasingly autonomous agents, lending pragmatic urgency to the question of what constitutes agency. Reinforcement learning (RL) offers arguably the most explicit formulation of agent-environment interaction, built on three core tenets: the environment as a Markov decision process, learning as policy optimization, and the agent as a maximizer of scalar reward. Recent work has called to revise these tenets: reconceptualizing learning as adaptation rather than optimization, broadening goals beyond scalar reward, and noting the "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.11482","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.11482/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2507.11482","created_at":"2026-06-23T02:13:16.024419+00:00"},{"alias_kind":"arxiv_version","alias_value":"2507.11482v4","created_at":"2026-06-23T02:13:16.024419+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.11482","created_at":"2026-06-23T02:13:16.024419+00:00"},{"alias_kind":"pith_short_12","alias_value":"YYCX3NQZJXZE","created_at":"2026-06-23T02:13:16.024419+00:00"},{"alias_kind":"pith_short_16","alias_value":"YYCX3NQZJXZEHCZP","created_at":"2026-06-23T02:13:16.024419+00:00"},{"alias_kind":"pith_short_8","alias_value":"YYCX3NQZ","created_at":"2026-06-23T02:13:16.024419+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU","json":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU.json","graph_json":"https://pith.science/api/pith-number/YYCX3NQZJXZEHCZP7OEPNCP4DU/graph.json","events_json":"https://pith.science/api/pith-number/YYCX3NQZJXZEHCZP7OEPNCP4DU/events.json","paper":"https://pith.science/paper/YYCX3NQZ"},"agent_actions":{"view_html":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU","download_json":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU.json","view_paper":"https://pith.science/paper/YYCX3NQZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2507.11482&json=true","fetch_graph":"https://pith.science/api/pith-number/YYCX3NQZJXZEHCZP7OEPNCP4DU/graph.json","fetch_events":"https://pith.science/api/pith-number/YYCX3NQZJXZEHCZP7OEPNCP4DU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU/action/storage_attestation","attest_author":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU/action/author_attestation","sign_citation":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU/action/citation_signature","submit_replication":"https://pith.science/pith/YYCX3NQZJXZEHCZP7OEPNCP4DU/action/replication_record"}},"created_at":"2026-06-23T02:13:16.024419+00:00","updated_at":"2026-06-23T02:13:16.024419+00:00"}