{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:2DXK36HLKUU4CD6WKA5V52N4SA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"42f4b854a37ef1f40ea149252e955c64bdfc3443f2100f05b8f40a30684c949e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T04:29:30Z","title_canon_sha256":"a1885d735d6ce2e25c9d3003fea4f2eac17a5365fa20aa7b0dca9e40cea11985"},"schema_version":"1.0","source":{"id":"2606.01672","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.01672","created_at":"2026-06-02T02:04:39Z"},{"alias_kind":"arxiv_version","alias_value":"2606.01672v1","created_at":"2026-06-02T02:04:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.01672","created_at":"2026-06-02T02:04:39Z"},{"alias_kind":"pith_short_12","alias_value":"2DXK36HLKUU4","created_at":"2026-06-02T02:04:39Z"},{"alias_kind":"pith_short_16","alias_value":"2DXK36HLKUU4CD6W","created_at":"2026-06-02T02:04:39Z"},{"alias_kind":"pith_short_8","alias_value":"2DXK36HL","created_at":"2026-06-02T02:04:39Z"}],"graph_snapshots":[{"event_id":"sha256:abf880dcd0bf8890848a21f5093867e3e42a72530496c6cc1542a441390d9456","target":"graph","created_at":"2026-06-02T02:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.01672/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning has enabled the acquisition of impressive robotic skills, but typically requires hand-crafted reward functions that are slow to design and difficult to align with human intentions. Recent work, such as Eureka, automates reward design by using an LLM to iteratively generate and refine reward code from task descriptions. However, they rely on coarse feedback signals such as success rate, which provide little semantic insight into the learned behavior. As a result, their trained policies achieve the final goal but are frequently poorly aligned with task instructions. We int","authors_text":"Ajay Subramanian, Ben Abbatematteo, Hojoon Lee, Karl Ridgeway, Nitin Kamra, Pedro Matias, Vijay Veerabadran","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T04:29:30Z","title":"RDA: Reward Design Agent for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.01672","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:97133f82d7ca87ad2474bf94799fc7548e914f1af83b117c93fec4f15bd89fcb","target":"record","created_at":"2026-06-02T02:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"42f4b854a37ef1f40ea149252e955c64bdfc3443f2100f05b8f40a30684c949e","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-01T04:29:30Z","title_canon_sha256":"a1885d735d6ce2e25c9d3003fea4f2eac17a5365fa20aa7b0dca9e40cea11985"},"schema_version":"1.0","source":{"id":"2606.01672","kind":"arxiv","version":1}},"canonical_sha256":"d0eeadf8eb5529c10fd6503b5ee9bc90236fd5a3761082dd38998da44660401a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d0eeadf8eb5529c10fd6503b5ee9bc90236fd5a3761082dd38998da44660401a","first_computed_at":"2026-06-02T02:04:39.737669Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T02:04:39.737669Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EvAPXhSSevYvGgk0AhJSa2gs8sQf+UXdM6zRo/BCaXR8INZOWICQbK2gQMeK9rS+/tChVtFoa+tHX7hUsuH1CA==","signature_status":"signed_v1","signed_at":"2026-06-02T02:04:39.738061Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.01672","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:97133f82d7ca87ad2474bf94799fc7548e914f1af83b117c93fec4f15bd89fcb","sha256:abf880dcd0bf8890848a21f5093867e3e42a72530496c6cc1542a441390d9456"],"state_sha256":"6913334f981551e035522003eacc1089781c1b2efeaeca94dc76f9edf5a3c66f"}