{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ZMZLOANBYGGA5G7GYCBXBAD5SP","short_pith_number":"pith:ZMZLOANB","canonical_record":{"source":{"id":"2606.03949","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-02T17:38:25Z","cross_cats_sorted":[],"title_canon_sha256":"8dfd9007dcbcf6654a650861eee69bcae486f82c045fd139bb396b97622f4132","abstract_canon_sha256":"9f4e1b70e0a3bf254e7bebfda03270aa1d7cb64cc025d023ea70d0e163d3ae3a"},"schema_version":"1.0"},"canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","source":{"kind":"arxiv","id":"2606.03949","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03949","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03949v1","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03949","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"pith_short_12","alias_value":"ZMZLOANBYGGA","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZMZLOANBYGGA5G7G","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZMZLOANB","created_at":"2026-06-03T02:06:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ZMZLOANBYGGA5G7GYCBXBAD5SP","target":"record","payload":{"canonical_record":{"source":{"id":"2606.03949","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-02T17:38:25Z","cross_cats_sorted":[],"title_canon_sha256":"8dfd9007dcbcf6654a650861eee69bcae486f82c045fd139bb396b97622f4132","abstract_canon_sha256":"9f4e1b70e0a3bf254e7bebfda03270aa1d7cb64cc025d023ea70d0e163d3ae3a"},"schema_version":"1.0"},"canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:06:07.755650Z","signature_b64":"VusKbMQu0A9PpZhJapLfd+qhgQE/1oZ4k/VmxiNig83ezyWP8jiOOXkO2gzw81j1mndVu1/w0cCVrOZIGbsDAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","last_reissued_at":"2026-06-03T02:06:07.755213Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:06:07.755213Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.03949","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T02:06:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"X/SDblb3QC0PWO2GW1omiCVvjsaPKuOGHLQC6HVb/zAR8krQ+f2O4GuBiVrtKVQvbCVy8TeAdBJe680D4Nr3AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T19:04:10.746675Z"},"content_sha256":"0ef60267ddc307b6ce43f71b6e5eff4ceb0136f1b29b6b8d4db4907228aa9d36","schema_version":"1.0","event_id":"sha256:0ef60267ddc307b6ce43f71b6e5eff4ceb0136f1b29b6b8d4db4907228aa9d36"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ZMZLOANBYGGA5G7GYCBXBAD5SP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Preference-Calibrated Human-in-the-Loop Reinforcement Learning for Robotic Manipulation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Bofang Jia, Chunhua Yang, Guangyao Liu, Keke Huang, Weihua Gui, Yinuo Qu, Yuquan Xue, Zeyi Liu, Ziwei Wang","submitted_at":"2026-06-02T17:38:25Z","abstract_excerpt":"Human-in-the-loop reinforcement learning (HIL-RL) improves sample efficiency in real-robot manipulation through online human intervention. However, successful trajectories may include suboptimal actions that deviate from the desired task-execution path and force human intervention. Existing HIL-RL methods typically apply the consistent credit assignment principle to all transitions, uniformly propagating discounted terminal rewards through suboptimal segments, ignoring the actual contribution of each transition to task success. This overestimates Q-values for critic learning and indirectly mis"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03949","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.03949/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T02:06:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ttNqxfEgx8azHgNPaLPVLNqOJfsmfArM2U8hZjNjjNg/rB5OlecwKV+3gasThzkVcRIboWm1oa2dbZ+dNOw2Cw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T19:04:10.747380Z"},"content_sha256":"afb67767585bbf219cabade8aeb40af22db7e81c3ec703b119df6bf7a3357872","schema_version":"1.0","event_id":"sha256:afb67767585bbf219cabade8aeb40af22db7e81c3ec703b119df6bf7a3357872"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/bundle.json","state_url":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T19:04:10Z","links":{"resolver":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP","bundle":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/bundle.json","state":"https://pith.science/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZMZLOANBYGGA5G7GYCBXBAD5SP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ZMZLOANBYGGA5G7GYCBXBAD5SP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9f4e1b70e0a3bf254e7bebfda03270aa1d7cb64cc025d023ea70d0e163d3ae3a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-02T17:38:25Z","title_canon_sha256":"8dfd9007dcbcf6654a650861eee69bcae486f82c045fd139bb396b97622f4132"},"schema_version":"1.0","source":{"id":"2606.03949","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03949","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03949v1","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03949","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"pith_short_12","alias_value":"ZMZLOANBYGGA","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"pith_short_16","alias_value":"ZMZLOANBYGGA5G7G","created_at":"2026-06-03T02:06:07Z"},{"alias_kind":"pith_short_8","alias_value":"ZMZLOANB","created_at":"2026-06-03T02:06:07Z"}],"graph_snapshots":[{"event_id":"sha256:afb67767585bbf219cabade8aeb40af22db7e81c3ec703b119df6bf7a3357872","target":"graph","created_at":"2026-06-03T02:06:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.03949/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Human-in-the-loop reinforcement learning (HIL-RL) improves sample efficiency in real-robot manipulation through online human intervention. However, successful trajectories may include suboptimal actions that deviate from the desired task-execution path and force human intervention. Existing HIL-RL methods typically apply the consistent credit assignment principle to all transitions, uniformly propagating discounted terminal rewards through suboptimal segments, ignoring the actual contribution of each transition to task success. This overestimates Q-values for critic learning and indirectly mis","authors_text":"Bofang Jia, Chunhua Yang, Guangyao Liu, Keke Huang, Weihua Gui, Yinuo Qu, Yuquan Xue, Zeyi Liu, Ziwei Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-02T17:38:25Z","title":"Preference-Calibrated Human-in-the-Loop Reinforcement Learning for Robotic Manipulation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03949","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0ef60267ddc307b6ce43f71b6e5eff4ceb0136f1b29b6b8d4db4907228aa9d36","target":"record","created_at":"2026-06-03T02:06:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9f4e1b70e0a3bf254e7bebfda03270aa1d7cb64cc025d023ea70d0e163d3ae3a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2026-06-02T17:38:25Z","title_canon_sha256":"8dfd9007dcbcf6654a650861eee69bcae486f82c045fd139bb396b97622f4132"},"schema_version":"1.0","source":{"id":"2606.03949","kind":"arxiv","version":1}},"canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cb32b701a1c18c0e9be6c08370807d93e306239423145c840c101865e1b831a6","first_computed_at":"2026-06-03T02:06:07.755213Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T02:06:07.755213Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VusKbMQu0A9PpZhJapLfd+qhgQE/1oZ4k/VmxiNig83ezyWP8jiOOXkO2gzw81j1mndVu1/w0cCVrOZIGbsDAg==","signature_status":"signed_v1","signed_at":"2026-06-03T02:06:07.755650Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.03949","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0ef60267ddc307b6ce43f71b6e5eff4ceb0136f1b29b6b8d4db4907228aa9d36","sha256:afb67767585bbf219cabade8aeb40af22db7e81c3ec703b119df6bf7a3357872"],"state_sha256":"febed1554393c80e87194a4af71b59f509ee6d73ceaff6d10af3a7c33197f2e0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Gx2LMlDuRJM/T5ZtVndIGv17aOem0YlyTQzEYKWGoj0To9DS5tGohIOZHlnXZ8KC9zlnZPGiZjW4OLk6iwqQCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T19:04:10.751665Z","bundle_sha256":"207f16c695742ea743be71befeea0109b696a9516d335efcdd19b090fa11ca0f"}}