{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:XVTQJF3DPSH4ZL3DWKNN2WV3EM","short_pith_number":"pith:XVTQJF3D","schema_version":"1.0","canonical_sha256":"bd670497637c8fccaf63b29add5abb2309ff4e0093405a07dd51f831c0e6a743","source":{"kind":"arxiv","id":"2501.19128","version":5},"attestation_state":"computed","paper":{"title":"Shaping Sparse Rewards in Reinforcement Learning: A Semi-supervised Approach","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chen Sun, Wenjie Huang, Wenyun Li","submitted_at":"2025-01-31T13:35:19Z","abstract_excerpt":"In many real-world scenarios, reward signal for agents are exceedingly sparse, making it challenging to learn an effective reward function for reward shaping. To address this issue, the proposed approach in this paper performs reward shaping not only by utilizing non-zero-reward transitions but also by employing the \\emph{Semi-Supervised Learning} (SSL) technique combined with a novel data augmentation to learn trajectory space representations from the majority of transitions, {i.e}., zero-reward transitions, thereby improving the efficacy of reward shaping. Experimental results in Atari and r"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2501.19128","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-01-31T13:35:19Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"e960e65fa750226502b5f00ad454f5561fcfc682da4629db2c87bcf995b426c8","abstract_canon_sha256":"4993b5860be35b7e4fa792e808ec8a17b494af55cc02e833e8fbf4ac97e0e531"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:17.590124Z","signature_b64":"CCHABbGvzvyQGFNh4LxrEcsMLoNbzZm92e2gnGWIZ2DVUkP71f2pw3OoMmxEVIWCTO6d+/KTwcusGp1O/DDuAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bd670497637c8fccaf63b29add5abb2309ff4e0093405a07dd51f831c0e6a743","last_reissued_at":"2026-05-20T00:00:17.589342Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:17.589342Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Shaping Sparse Rewards in Reinforcement Learning: A Semi-supervised Approach","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chen Sun, Wenjie Huang, Wenyun Li","submitted_at":"2025-01-31T13:35:19Z","abstract_excerpt":"In many real-world scenarios, reward signal for agents are exceedingly sparse, making it challenging to learn an effective reward function for reward shaping. To address this issue, the proposed approach in this paper performs reward shaping not only by utilizing non-zero-reward transitions but also by employing the \\emph{Semi-Supervised Learning} (SSL) technique combined with a novel data augmentation to learn trajectory space representations from the majority of transitions, {i.e}., zero-reward transitions, thereby improving the efficacy of reward shaping. Experimental results in Atari and r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.19128","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2501.19128/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2501.19128","created_at":"2026-05-20T00:00:17.589467+00:00"},{"alias_kind":"arxiv_version","alias_value":"2501.19128v5","created_at":"2026-05-20T00:00:17.589467+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.19128","created_at":"2026-05-20T00:00:17.589467+00:00"},{"alias_kind":"pith_short_12","alias_value":"XVTQJF3DPSH4","created_at":"2026-05-20T00:00:17.589467+00:00"},{"alias_kind":"pith_short_16","alias_value":"XVTQJF3DPSH4ZL3D","created_at":"2026-05-20T00:00:17.589467+00:00"},{"alias_kind":"pith_short_8","alias_value":"XVTQJF3D","created_at":"2026-05-20T00:00:17.589467+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM","json":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM.json","graph_json":"https://pith.science/api/pith-number/XVTQJF3DPSH4ZL3DWKNN2WV3EM/graph.json","events_json":"https://pith.science/api/pith-number/XVTQJF3DPSH4ZL3DWKNN2WV3EM/events.json","paper":"https://pith.science/paper/XVTQJF3D"},"agent_actions":{"view_html":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM","download_json":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM.json","view_paper":"https://pith.science/paper/XVTQJF3D","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2501.19128&json=true","fetch_graph":"https://pith.science/api/pith-number/XVTQJF3DPSH4ZL3DWKNN2WV3EM/graph.json","fetch_events":"https://pith.science/api/pith-number/XVTQJF3DPSH4ZL3DWKNN2WV3EM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM/action/storage_attestation","attest_author":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM/action/author_attestation","sign_citation":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM/action/citation_signature","submit_replication":"https://pith.science/pith/XVTQJF3DPSH4ZL3DWKNN2WV3EM/action/replication_record"}},"created_at":"2026-05-20T00:00:17.589467+00:00","updated_at":"2026-05-20T00:00:17.589467+00:00"}