{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DHAIKAOSMTSMYMRGOGZ64SWFJL","short_pith_number":"pith:DHAIKAOS","canonical_record":{"source":{"id":"2606.27180","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T15:45:57Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"f0ad7658d4532fba158efd5f56ddfd28867867499af9b06bc51f26886eec4ec1","abstract_canon_sha256":"f75bd780076d8013067e3f5b263d5d6ca5f24b9948af99b6eb1aeed1b1d769f9"},"schema_version":"1.0"},"canonical_sha256":"19c08501d264e4cc322671b3ee4ac54ac6d00de0f9d120f6266ae945d5afd876","source":{"kind":"arxiv","id":"2606.27180","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.27180","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"arxiv_version","alias_value":"2606.27180v1","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.27180","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"pith_short_12","alias_value":"DHAIKAOSMTSM","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"pith_short_16","alias_value":"DHAIKAOSMTSMYMRG","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"pith_short_8","alias_value":"DHAIKAOS","created_at":"2026-06-26T01:16:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DHAIKAOSMTSMYMRGOGZ64SWFJL","target":"record","payload":{"canonical_record":{"source":{"id":"2606.27180","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T15:45:57Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"f0ad7658d4532fba158efd5f56ddfd28867867499af9b06bc51f26886eec4ec1","abstract_canon_sha256":"f75bd780076d8013067e3f5b263d5d6ca5f24b9948af99b6eb1aeed1b1d769f9"},"schema_version":"1.0"},"canonical_sha256":"19c08501d264e4cc322671b3ee4ac54ac6d00de0f9d120f6266ae945d5afd876","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T01:16:12.703598Z","signature_b64":"mReIg8wSRgmVyQPlS315Jz+G5qlkV1HARA5XDWGD/yw+n/EDfjl2Q9ancdj5i5YaQlOMN8nn9cTjs1JdTzMECA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"19c08501d264e4cc322671b3ee4ac54ac6d00de0f9d120f6266ae945d5afd876","last_reissued_at":"2026-06-26T01:16:12.703245Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T01:16:12.703245Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.27180","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:16:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1+EmcoXm0Xi0+tfTLCBSF7oLSw0vpaTyZP15qEZ9rbs96T/XkjJLmvamcTc1atVkVhgnOrpH54CR0LwGOoUKCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T10:48:54.104323Z"},"content_sha256":"d7b63516d9def3f0e711f69fce2a8e8f284a0b6b87355a0a842cff325b78ac5b","schema_version":"1.0","event_id":"sha256:d7b63516d9def3f0e711f69fce2a8e8f284a0b6b87355a0a842cff325b78ac5b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DHAIKAOSMTSMYMRGOGZ64SWFJL","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Automating Potential-based Reward Shaping with Vision Language Model Guidance","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Daniel Kudenko, Henrik M\\\"uller","submitted_at":"2026-06-25T15:45:57Z","abstract_excerpt":"Sparse rewards are inherently challenging for reinforcement learning agents as they lack intermediate feedback to guide exploration and to correctly attribute the sparse success rewards to relevant parts of the trajectory. Naive reward shaping can induce reward hacking, yielding policies that exploit auxiliary signals instead of solving the intended task. Potential-based reward shaping (PBRS) guarantees preservation of the optimal policy set, but requires the definition of a heuristic potential function over the state space. In this work, we introduce the VLM-guided PBRS framework VLM-PBRS tha"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.27180","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.27180/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-26T01:16:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"E4tIxITin+GHmi0Q762HxH0azUW5qlSt3TWq9q4aozzxHdl0TmLS2nhLZxrvv+bPgYAczbnw6U4VRgNdmHGsCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-01T10:48:54.104707Z"},"content_sha256":"7f9481c6bed2759dad0d9a9c76fbfeb15d7dbffc7365d9a35e67792bd95322ba","schema_version":"1.0","event_id":"sha256:7f9481c6bed2759dad0d9a9c76fbfeb15d7dbffc7365d9a35e67792bd95322ba"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL/bundle.json","state_url":"https://pith.science/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-01T10:48:54Z","links":{"resolver":"https://pith.science/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL","bundle":"https://pith.science/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL/bundle.json","state":"https://pith.science/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DHAIKAOSMTSMYMRGOGZ64SWFJL/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DHAIKAOSMTSMYMRGOGZ64SWFJL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f75bd780076d8013067e3f5b263d5d6ca5f24b9948af99b6eb1aeed1b1d769f9","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T15:45:57Z","title_canon_sha256":"f0ad7658d4532fba158efd5f56ddfd28867867499af9b06bc51f26886eec4ec1"},"schema_version":"1.0","source":{"id":"2606.27180","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.27180","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"arxiv_version","alias_value":"2606.27180v1","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.27180","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"pith_short_12","alias_value":"DHAIKAOSMTSM","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"pith_short_16","alias_value":"DHAIKAOSMTSMYMRG","created_at":"2026-06-26T01:16:12Z"},{"alias_kind":"pith_short_8","alias_value":"DHAIKAOS","created_at":"2026-06-26T01:16:12Z"}],"graph_snapshots":[{"event_id":"sha256:7f9481c6bed2759dad0d9a9c76fbfeb15d7dbffc7365d9a35e67792bd95322ba","target":"graph","created_at":"2026-06-26T01:16:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.27180/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Sparse rewards are inherently challenging for reinforcement learning agents as they lack intermediate feedback to guide exploration and to correctly attribute the sparse success rewards to relevant parts of the trajectory. Naive reward shaping can induce reward hacking, yielding policies that exploit auxiliary signals instead of solving the intended task. Potential-based reward shaping (PBRS) guarantees preservation of the optimal policy set, but requires the definition of a heuristic potential function over the state space. In this work, we introduce the VLM-guided PBRS framework VLM-PBRS tha","authors_text":"Daniel Kudenko, Henrik M\\\"uller","cross_cats":["cs.AI","cs.RO"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T15:45:57Z","title":"Automating Potential-based Reward Shaping with Vision Language Model Guidance"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.27180","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d7b63516d9def3f0e711f69fce2a8e8f284a0b6b87355a0a842cff325b78ac5b","target":"record","created_at":"2026-06-26T01:16:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f75bd780076d8013067e3f5b263d5d6ca5f24b9948af99b6eb1aeed1b1d769f9","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-25T15:45:57Z","title_canon_sha256":"f0ad7658d4532fba158efd5f56ddfd28867867499af9b06bc51f26886eec4ec1"},"schema_version":"1.0","source":{"id":"2606.27180","kind":"arxiv","version":1}},"canonical_sha256":"19c08501d264e4cc322671b3ee4ac54ac6d00de0f9d120f6266ae945d5afd876","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"19c08501d264e4cc322671b3ee4ac54ac6d00de0f9d120f6266ae945d5afd876","first_computed_at":"2026-06-26T01:16:12.703245Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-26T01:16:12.703245Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mReIg8wSRgmVyQPlS315Jz+G5qlkV1HARA5XDWGD/yw+n/EDfjl2Q9ancdj5i5YaQlOMN8nn9cTjs1JdTzMECA==","signature_status":"signed_v1","signed_at":"2026-06-26T01:16:12.703598Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.27180","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d7b63516d9def3f0e711f69fce2a8e8f284a0b6b87355a0a842cff325b78ac5b","sha256:7f9481c6bed2759dad0d9a9c76fbfeb15d7dbffc7365d9a35e67792bd95322ba"],"state_sha256":"58783dbdfced4ed544141c7125f87f981978a93a7b219a5f3bba834adc2273b3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ATwEFln0Hg4CoM3jAb3ssNZdnLcOBN0d0uRXP/HmNUHmf+FFTmARExdpehh431f7UWQ7mdu/UCHH9c0101klDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-01T10:48:54.106912Z","bundle_sha256":"2922beb7be33a76bfaec0d73e045e71d2fdc0575f89b6eeeb6c08af13bcbe300"}}