{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:VPQV2IURZRSSFH3RP3SW6OOH36","short_pith_number":"pith:VPQV2IUR","canonical_record":{"source":{"id":"2509.16136","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-09-19T16:35:27Z","cross_cats_sorted":[],"title_canon_sha256":"914219076ae6706fbc488047b329d23e7c755e4a51cbd4ff22badcad29cf721b","abstract_canon_sha256":"3ff935255af193135f7506e2a408f3df8ba4f39a63e2ef6df527a5172ea2dcb1"},"schema_version":"1.0"},"canonical_sha256":"abe15d2291cc65229f717ee56f39c7dfa5eaacc3b5807ebd75f8f776fa375f2e","source":{"kind":"arxiv","id":"2509.16136","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.16136","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"arxiv_version","alias_value":"2509.16136v5","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.16136","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"pith_short_12","alias_value":"VPQV2IURZRSS","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"pith_short_16","alias_value":"VPQV2IURZRSSFH3R","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"pith_short_8","alias_value":"VPQV2IUR","created_at":"2026-06-09T01:05:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:VPQV2IURZRSSFH3RP3SW6OOH36","target":"record","payload":{"canonical_record":{"source":{"id":"2509.16136","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-09-19T16:35:27Z","cross_cats_sorted":[],"title_canon_sha256":"914219076ae6706fbc488047b329d23e7c755e4a51cbd4ff22badcad29cf721b","abstract_canon_sha256":"3ff935255af193135f7506e2a408f3df8ba4f39a63e2ef6df527a5172ea2dcb1"},"schema_version":"1.0"},"canonical_sha256":"abe15d2291cc65229f717ee56f39c7dfa5eaacc3b5807ebd75f8f776fa375f2e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T01:05:09.378786Z","signature_b64":"WBgsupW9MN7yt2v1lUJFXbZw/QiejKCf4SoQg0c1jwJ+w4LPbyVpGsi0txQqxr5NVnv8LNsgFXPqvSGtiCfZAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"abe15d2291cc65229f717ee56f39c7dfa5eaacc3b5807ebd75f8f776fa375f2e","last_reissued_at":"2026-06-09T01:05:09.378300Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T01:05:09.378300Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2509.16136","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:05:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KsIlmp87W2O3FEvdejFVFOhn2nD7KtOlIngym+U1RgzfNoyCwucaqxXS8z/ElJCmrS2VQwAzACrfKZMzAQufBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T21:50:37.273305Z"},"content_sha256":"3bb55e756d85ee4bde4dd54b7c077f040c2e1ff50b99a02ed21c950243fbf7d7","schema_version":"1.0","event_id":"sha256:3bb55e756d85ee4bde4dd54b7c077f040c2e1ff50b99a02ed21c950243fbf7d7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:VPQV2IURZRSSFH3RP3SW6OOH36","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reward Evolution with Graph-of-Thoughts: A Bi-Level Language Model Framework for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Changwei Yao, Chen Li, Marios Savvides, Xinzi Liu","submitted_at":"2025-09-19T16:35:27Z","abstract_excerpt":"Designing effective reward functions remains a major challenge in reinforcement learning (RL), often requiring considerable human expertise and iterative refinement. Recent advances leverage Large Language Models (LLMs) for automated reward design, but these approaches are limited by hallucinations, reliance on human feedback, and challenges with handling complex, multi-step tasks. In this work, we introduce Reward Evolution with Graph-of-Thoughts (RE-GoT), a novel bi-level framework that enhances LLMs with structured graph-based reasoning and integrates Visual Language Models (VLMs) for autom"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.16136","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.16136/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T01:05:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+Z9iHRhW22twBc9YiK09utbaj94mnCCbngocP51CznTec2VF7tsx+JI3rigThPLFjLjr4PKrQ2llWGPXBmNSCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-24T21:50:37.273707Z"},"content_sha256":"b33f3e7b92002407167f55d12b025ffca49a588bd8ad8f7dd1d7b6a1eedbf982","schema_version":"1.0","event_id":"sha256:b33f3e7b92002407167f55d12b025ffca49a588bd8ad8f7dd1d7b6a1eedbf982"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VPQV2IURZRSSFH3RP3SW6OOH36/bundle.json","state_url":"https://pith.science/pith/VPQV2IURZRSSFH3RP3SW6OOH36/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VPQV2IURZRSSFH3RP3SW6OOH36/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-24T21:50:37Z","links":{"resolver":"https://pith.science/pith/VPQV2IURZRSSFH3RP3SW6OOH36","bundle":"https://pith.science/pith/VPQV2IURZRSSFH3RP3SW6OOH36/bundle.json","state":"https://pith.science/pith/VPQV2IURZRSSFH3RP3SW6OOH36/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VPQV2IURZRSSFH3RP3SW6OOH36/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:VPQV2IURZRSSFH3RP3SW6OOH36","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3ff935255af193135f7506e2a408f3df8ba4f39a63e2ef6df527a5172ea2dcb1","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-09-19T16:35:27Z","title_canon_sha256":"914219076ae6706fbc488047b329d23e7c755e4a51cbd4ff22badcad29cf721b"},"schema_version":"1.0","source":{"id":"2509.16136","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.16136","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"arxiv_version","alias_value":"2509.16136v5","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.16136","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"pith_short_12","alias_value":"VPQV2IURZRSS","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"pith_short_16","alias_value":"VPQV2IURZRSSFH3R","created_at":"2026-06-09T01:05:09Z"},{"alias_kind":"pith_short_8","alias_value":"VPQV2IUR","created_at":"2026-06-09T01:05:09Z"}],"graph_snapshots":[{"event_id":"sha256:b33f3e7b92002407167f55d12b025ffca49a588bd8ad8f7dd1d7b6a1eedbf982","target":"graph","created_at":"2026-06-09T01:05:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2509.16136/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Designing effective reward functions remains a major challenge in reinforcement learning (RL), often requiring considerable human expertise and iterative refinement. Recent advances leverage Large Language Models (LLMs) for automated reward design, but these approaches are limited by hallucinations, reliance on human feedback, and challenges with handling complex, multi-step tasks. In this work, we introduce Reward Evolution with Graph-of-Thoughts (RE-GoT), a novel bi-level framework that enhances LLMs with structured graph-based reasoning and integrates Visual Language Models (VLMs) for autom","authors_text":"Changwei Yao, Chen Li, Marios Savvides, Xinzi Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-09-19T16:35:27Z","title":"Reward Evolution with Graph-of-Thoughts: A Bi-Level Language Model Framework for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.16136","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3bb55e756d85ee4bde4dd54b7c077f040c2e1ff50b99a02ed21c950243fbf7d7","target":"record","created_at":"2026-06-09T01:05:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3ff935255af193135f7506e2a408f3df8ba4f39a63e2ef6df527a5172ea2dcb1","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-09-19T16:35:27Z","title_canon_sha256":"914219076ae6706fbc488047b329d23e7c755e4a51cbd4ff22badcad29cf721b"},"schema_version":"1.0","source":{"id":"2509.16136","kind":"arxiv","version":5}},"canonical_sha256":"abe15d2291cc65229f717ee56f39c7dfa5eaacc3b5807ebd75f8f776fa375f2e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"abe15d2291cc65229f717ee56f39c7dfa5eaacc3b5807ebd75f8f776fa375f2e","first_computed_at":"2026-06-09T01:05:09.378300Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:05:09.378300Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WBgsupW9MN7yt2v1lUJFXbZw/QiejKCf4SoQg0c1jwJ+w4LPbyVpGsi0txQqxr5NVnv8LNsgFXPqvSGtiCfZAQ==","signature_status":"signed_v1","signed_at":"2026-06-09T01:05:09.378786Z","signed_message":"canonical_sha256_bytes"},"source_id":"2509.16136","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3bb55e756d85ee4bde4dd54b7c077f040c2e1ff50b99a02ed21c950243fbf7d7","sha256:b33f3e7b92002407167f55d12b025ffca49a588bd8ad8f7dd1d7b6a1eedbf982"],"state_sha256":"a9da438f2ad9c5deacb120ef4188eaa31e486b55ec88f9048b3a55be77f10017"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9UxIchg7kUi32+7oFnAPFNCfO367QX8NHzkmBFIqZn5e0WiLnqEFU7HDfGjIakXB5RA42Sr8gnIzA2PRasp9Dw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-24T21:50:37.275708Z","bundle_sha256":"3c512d9a793558fea5256ae759074b06f5b61ae11458b8ab7e3bd254af60d5c8"}}