{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:D3NK52THQR37ZPVEGCOVLX636W","short_pith_number":"pith:D3NK52TH","schema_version":"1.0","canonical_sha256":"1edaaeea678477fcbea4309d55dfdbf5b2d8b397499df85226f1bc44c8bd538f","source":{"kind":"arxiv","id":"2502.10906","version":2},"attestation_state":"computed","paper":{"title":"PCGRLLM: Large Language Model-Driven Reward Design for Procedural Content Generation Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"In-Chang Baek, Jin-Ha Noh, Julian Togelius, Kyung-Joong Kim, Sam Earle, Sung-Hyun Kim, Zehua Jiang","submitted_at":"2025-02-15T21:00:40Z","abstract_excerpt":"Reward design plays a pivotal role in the training of game AIs, requiring substantial domain-specific knowledge and human effort. In recent years, several studies have explored reward generation for training game agents and controlling robots using large language models (LLMs). In the content generation literature, there has been early work on generating reward functions for reinforcement learning agent generators. This work introduces PCGRLLM, an extended architecture based on earlier work, which employs a feedback mechanism and several reasoning-based prompt engineering techniques. We evalua"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2502.10906","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-02-15T21:00:40Z","cross_cats_sorted":[],"title_canon_sha256":"e01092e52dbcc39a4f61db90457dbcde4332d0f0efafd9b04e0d78d7df3f4bad","abstract_canon_sha256":"f9c1380bc511647875f3d73ba73b50cba708c67975b6910fbed91b50f35f7685"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:03:48.638706Z","signature_b64":"SgvjcJPcnuXPyCWokAH1EaymG/ma3v9Bv/lOYW/JOI6KDVJXxgRF2pgy+tclEkT//ZXc8zkBjpmmRnHViq4bCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1edaaeea678477fcbea4309d55dfdbf5b2d8b397499df85226f1bc44c8bd538f","last_reissued_at":"2026-05-26T02:03:48.637634Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:03:48.637634Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PCGRLLM: Large Language Model-Driven Reward Design for Procedural Content Generation Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"In-Chang Baek, Jin-Ha Noh, Julian Togelius, Kyung-Joong Kim, Sam Earle, Sung-Hyun Kim, Zehua Jiang","submitted_at":"2025-02-15T21:00:40Z","abstract_excerpt":"Reward design plays a pivotal role in the training of game AIs, requiring substantial domain-specific knowledge and human effort. In recent years, several studies have explored reward generation for training game agents and controlling robots using large language models (LLMs). In the content generation literature, there has been early work on generating reward functions for reinforcement learning agent generators. This work introduces PCGRLLM, an extended architecture based on earlier work, which employs a feedback mechanism and several reasoning-based prompt engineering techniques. We evalua"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.10906","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2502.10906/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2502.10906","created_at":"2026-05-26T02:03:48.637786+00:00"},{"alias_kind":"arxiv_version","alias_value":"2502.10906v2","created_at":"2026-05-26T02:03:48.637786+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.10906","created_at":"2026-05-26T02:03:48.637786+00:00"},{"alias_kind":"pith_short_12","alias_value":"D3NK52THQR37","created_at":"2026-05-26T02:03:48.637786+00:00"},{"alias_kind":"pith_short_16","alias_value":"D3NK52THQR37ZPVE","created_at":"2026-05-26T02:03:48.637786+00:00"},{"alias_kind":"pith_short_8","alias_value":"D3NK52TH","created_at":"2026-05-26T02:03:48.637786+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W","json":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W.json","graph_json":"https://pith.science/api/pith-number/D3NK52THQR37ZPVEGCOVLX636W/graph.json","events_json":"https://pith.science/api/pith-number/D3NK52THQR37ZPVEGCOVLX636W/events.json","paper":"https://pith.science/paper/D3NK52TH"},"agent_actions":{"view_html":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W","download_json":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W.json","view_paper":"https://pith.science/paper/D3NK52TH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2502.10906&json=true","fetch_graph":"https://pith.science/api/pith-number/D3NK52THQR37ZPVEGCOVLX636W/graph.json","fetch_events":"https://pith.science/api/pith-number/D3NK52THQR37ZPVEGCOVLX636W/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W/action/timestamp_anchor","attest_storage":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W/action/storage_attestation","attest_author":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W/action/author_attestation","sign_citation":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W/action/citation_signature","submit_replication":"https://pith.science/pith/D3NK52THQR37ZPVEGCOVLX636W/action/replication_record"}},"created_at":"2026-05-26T02:03:48.637786+00:00","updated_at":"2026-05-26T02:03:48.637786+00:00"}