{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:EZ7YUGZS65UKWYNX5X6OVA6N45","short_pith_number":"pith:EZ7YUGZS","canonical_record":{"source":{"id":"2409.02428","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-04T04:15:14Z","cross_cats_sorted":["cs.AI","cs.CL","cs.SY","eess.SY"],"title_canon_sha256":"c74ff9c93c0b5dd750270199e987adcbdf59908c88dd9d6cfb20998e907507c1","abstract_canon_sha256":"a41e60cd3904a2f25b278d593eb1747785cf9e6b7c314a9b717c154d79f60493"},"schema_version":"1.0"},"canonical_sha256":"267f8a1b32f768ab61b7edfcea83cde746b2d3ecc4d4446124d09b4e5550559d","source":{"kind":"arxiv","id":"2409.02428","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2409.02428","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"arxiv_version","alias_value":"2409.02428v4","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.02428","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"pith_short_12","alias_value":"EZ7YUGZS65UK","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"pith_short_16","alias_value":"EZ7YUGZS65UKWYNX","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"pith_short_8","alias_value":"EZ7YUGZS","created_at":"2026-05-20T00:02:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:EZ7YUGZS65UKWYNX5X6OVA6N45","target":"record","payload":{"canonical_record":{"source":{"id":"2409.02428","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-04T04:15:14Z","cross_cats_sorted":["cs.AI","cs.CL","cs.SY","eess.SY"],"title_canon_sha256":"c74ff9c93c0b5dd750270199e987adcbdf59908c88dd9d6cfb20998e907507c1","abstract_canon_sha256":"a41e60cd3904a2f25b278d593eb1747785cf9e6b7c314a9b717c154d79f60493"},"schema_version":"1.0"},"canonical_sha256":"267f8a1b32f768ab61b7edfcea83cde746b2d3ecc4d4446124d09b4e5550559d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:46.781962Z","signature_b64":"z3ReF+HDhMIom5ZnNXhzW9Yl2Sg6KEAdRkXmFLQvnh7AmnoOTpwwGIkm0YQ+CfkKE180J7XGMn4AzDZ43AckBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"267f8a1b32f768ab61b7edfcea83cde746b2d3ecc4d4446124d09b4e5550559d","last_reissued_at":"2026-05-20T00:02:46.781413Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:46.781413Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2409.02428","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"rxjXN/y5Ak5Wac80kqQG5uurygectaWtOs1gNYJrNnqSXt1g9cgx18cdJlBwRIdxLVRRr8Z8rwKvphpJkl9aBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T18:30:30.506220Z"},"content_sha256":"72a7b3fcb0eb060009cbf2d062db9f7835bfb8f31c476e76cfd83a496fc68587","schema_version":"1.0","event_id":"sha256:72a7b3fcb0eb060009cbf2d062db9f7835bfb8f31c476e76cfd83a496fc68587"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:EZ7YUGZS65UKWYNX5X6OVA6N45","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Language Models as Efficient Reward Function Searchers for Custom-Environment Multi-Objective Reinforcement","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.SY","eess.SY"],"primary_cat":"cs.LG","authors_text":"Guanwen Xie, Jingzehua Xu, Shuai Zhang, Yimian Ding, Yiyuan Yang","submitted_at":"2024-09-04T04:15:14Z","abstract_excerpt":"Achieving the effective design and improvement of reward functions in reinforcement learning (RL) tasks with complex custom environments and multiple requirements presents considerable challenges. In this paper, we propose ERFSL, an efficient reward function searcher using LLMs, which enables LLMs to be effective white-box searchers and highlights their advanced semantic understanding capabilities. Specifically, we generate reward components for each numerically explicit user requirement and employ a reward critic to identify the correct code form. Then, LLMs assign weights to the reward compo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.02428","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2409.02428/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:02:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DBDaKjDEAsTmDz5MyeX9xwAQbGdChCZtUQMmuzO3ZGQB15xWpaPH4Jf3qL11zFShtFrhzyKTjJh9iTFPlFk1Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T18:30:30.506782Z"},"content_sha256":"77b4d5432897c476361d5af0365a5f1957bf157a94178ca6479d6b4998bfde85","schema_version":"1.0","event_id":"sha256:77b4d5432897c476361d5af0365a5f1957bf157a94178ca6479d6b4998bfde85"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EZ7YUGZS65UKWYNX5X6OVA6N45/bundle.json","state_url":"https://pith.science/pith/EZ7YUGZS65UKWYNX5X6OVA6N45/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EZ7YUGZS65UKWYNX5X6OVA6N45/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T18:30:30Z","links":{"resolver":"https://pith.science/pith/EZ7YUGZS65UKWYNX5X6OVA6N45","bundle":"https://pith.science/pith/EZ7YUGZS65UKWYNX5X6OVA6N45/bundle.json","state":"https://pith.science/pith/EZ7YUGZS65UKWYNX5X6OVA6N45/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EZ7YUGZS65UKWYNX5X6OVA6N45/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:EZ7YUGZS65UKWYNX5X6OVA6N45","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a41e60cd3904a2f25b278d593eb1747785cf9e6b7c314a9b717c154d79f60493","cross_cats_sorted":["cs.AI","cs.CL","cs.SY","eess.SY"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-04T04:15:14Z","title_canon_sha256":"c74ff9c93c0b5dd750270199e987adcbdf59908c88dd9d6cfb20998e907507c1"},"schema_version":"1.0","source":{"id":"2409.02428","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2409.02428","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"arxiv_version","alias_value":"2409.02428v4","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.02428","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"pith_short_12","alias_value":"EZ7YUGZS65UK","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"pith_short_16","alias_value":"EZ7YUGZS65UKWYNX","created_at":"2026-05-20T00:02:46Z"},{"alias_kind":"pith_short_8","alias_value":"EZ7YUGZS","created_at":"2026-05-20T00:02:46Z"}],"graph_snapshots":[{"event_id":"sha256:77b4d5432897c476361d5af0365a5f1957bf157a94178ca6479d6b4998bfde85","target":"graph","created_at":"2026-05-20T00:02:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2409.02428/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Achieving the effective design and improvement of reward functions in reinforcement learning (RL) tasks with complex custom environments and multiple requirements presents considerable challenges. In this paper, we propose ERFSL, an efficient reward function searcher using LLMs, which enables LLMs to be effective white-box searchers and highlights their advanced semantic understanding capabilities. Specifically, we generate reward components for each numerically explicit user requirement and employ a reward critic to identify the correct code form. Then, LLMs assign weights to the reward compo","authors_text":"Guanwen Xie, Jingzehua Xu, Shuai Zhang, Yimian Ding, Yiyuan Yang","cross_cats":["cs.AI","cs.CL","cs.SY","eess.SY"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-04T04:15:14Z","title":"Language Models as Efficient Reward Function Searchers for Custom-Environment Multi-Objective Reinforcement"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.02428","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:72a7b3fcb0eb060009cbf2d062db9f7835bfb8f31c476e76cfd83a496fc68587","target":"record","created_at":"2026-05-20T00:02:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a41e60cd3904a2f25b278d593eb1747785cf9e6b7c314a9b717c154d79f60493","cross_cats_sorted":["cs.AI","cs.CL","cs.SY","eess.SY"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-09-04T04:15:14Z","title_canon_sha256":"c74ff9c93c0b5dd750270199e987adcbdf59908c88dd9d6cfb20998e907507c1"},"schema_version":"1.0","source":{"id":"2409.02428","kind":"arxiv","version":4}},"canonical_sha256":"267f8a1b32f768ab61b7edfcea83cde746b2d3ecc4d4446124d09b4e5550559d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"267f8a1b32f768ab61b7edfcea83cde746b2d3ecc4d4446124d09b4e5550559d","first_computed_at":"2026-05-20T00:02:46.781413Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:02:46.781413Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"z3ReF+HDhMIom5ZnNXhzW9Yl2Sg6KEAdRkXmFLQvnh7AmnoOTpwwGIkm0YQ+CfkKE180J7XGMn4AzDZ43AckBA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:02:46.781962Z","signed_message":"canonical_sha256_bytes"},"source_id":"2409.02428","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:72a7b3fcb0eb060009cbf2d062db9f7835bfb8f31c476e76cfd83a496fc68587","sha256:77b4d5432897c476361d5af0365a5f1957bf157a94178ca6479d6b4998bfde85"],"state_sha256":"f9369f042e578fd18abdd61d789377fcf629444b46ea1983b23193f6cf0039ce"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cbDvkPYAzWsAOaqKPj+N5L/T8PiKY/HZZrYtCf9TrqCMvelZ6glp2nk0IrtqYApZce0xXmvRX3Y8O3rpw6RzAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T18:30:30.509576Z","bundle_sha256":"bee859484c35a10963d46ded4be6cc21b540e1b00f60d28a56fb66a1ff2fb5a1"}}