{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZMZLOANBYGGA5G7GYCBXBAD5SP","short_pith_number":"pith:ZMZLOANB","schema_version":"1.0","canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","source":{"kind":"arxiv","id":"2606.03949","version":1},"attestation_state":"computed","paper":{"title":"Preference-Calibrated Human-in-the-Loop Reinforcement Learning for Robotic Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Bofang Jia, Chunhua Yang, Guangyao Liu, Keke Huang, Weihua Gui, Yinuo Qu, Yuquan Xue, Zeyi Liu, Ziwei Wang","submitted_at":"2026-06-02T17:38:25Z","abstract_excerpt":"Human-in-the-loop reinforcement learning (HIL-RL) improves sample efficiency in real-robot manipulation through online human intervention. However, successful trajectories may include suboptimal actions that deviate from the desired task-execution path and force human intervention. Existing HIL-RL methods typically apply the consistent credit assignment principle to all transitions, uniformly propagating discounted terminal rewards through suboptimal segments, ignoring the actual contribution of each transition to task success. This overestimates Q-values for critic learning and indirectly mis"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.03949","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-02T17:38:25Z","cross_cats_sorted":[],"title_canon_sha256":"8dfd9007dcbcf6654a650861eee69bcae486f82c045fd139bb396b97622f4132","abstract_canon_sha256":"9f4e1b70e0a3bf254e7bebfda03270aa1d7cb64cc025d023ea70d0e163d3ae3a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:06:07.755650Z","signature_b64":"VusKbMQu0A9PpZhJapLfd+qhgQE/1oZ4k/VmxiNig83ezyWP8jiOOXkO2gzw81j1mndVu1/w0cCVrOZIGbsDAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","last_reissued_at":"2026-06-03T02:06:07.755213Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:06:07.755213Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Preference-Calibrated Human-in-the-Loop Reinforcement Learning for Robotic Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Bofang Jia, Chunhua Yang, Guangyao Liu, Keke Huang, Weihua Gui, Yinuo Qu, Yuquan Xue, Zeyi Liu, Ziwei Wang","submitted_at":"2026-06-02T17:38:25Z","abstract_excerpt":"Human-in-the-loop reinforcement learning (HIL-RL) improves sample efficiency in real-robot manipulation through online human intervention. However, successful trajectories may include suboptimal actions that deviate from the desired task-execution path and force human intervention. Existing HIL-RL methods typically apply the consistent credit assignment principle to all transitions, uniformly propagating discounted terminal rewards through suboptimal segments, ignoring the actual contribution of each transition to task success. This overestimates Q-values for critic learning and indirectly mis"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03949","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.03949/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.03949","created_at":"2026-06-03T02:06:07.755268+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.03949v1","created_at":"2026-06-03T02:06:07.755268+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03949","created_at":"2026-06-03T02:06:07.755268+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZMZLOANBYGGA","created_at":"2026-06-03T02:06:07.755268+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZMZLOANBYGGA5G7G","created_at":"2026-06-03T02:06:07.755268+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZMZLOANB","created_at":"2026-06-03T02:06:07.755268+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP","json":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP.json","graph_json":"https://pith.science/api/pith-number/ZMZLOANBYGGA5G7GYCBXBAD5SP/graph.json","events_json":"https://pith.science/api/pith-number/ZMZLOANBYGGA5G7GYCBXBAD5SP/events.json","paper":"https://pith.science/paper/ZMZLOANB"},"agent_actions":{"view_html":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP","download_json":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP.json","view_paper":"https://pith.science/paper/ZMZLOANB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.03949&json=true","fetch_graph":"https://pith.science/api/pith-number/ZMZLOANBYGGA5G7GYCBXBAD5SP/graph.json","fetch_events":"https://pith.science/api/pith-number/ZMZLOANBYGGA5G7GYCBXBAD5SP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/action/storage_attestation","attest_author":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/action/author_attestation","sign_citation":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/action/citation_signature","submit_replication":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/action/replication_record"}},"created_at":"2026-06-03T02:06:07.755268+00:00","updated_at":"2026-06-03T02:06:07.755268+00:00"}