{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:O5RC7P44HYHQ53A65V3UOPY35S","short_pith_number":"pith:O5RC7P44","canonical_record":{"source":{"id":"2601.21350","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-29T07:18:45Z","cross_cats_sorted":[],"title_canon_sha256":"ef9548512bf43c9ae4ec0250288b5e0ce0af5b51c132b44b8796f6e5137f513e","abstract_canon_sha256":"46a5454ef372d5b019bab64da7b7eec03e8f32b07237e0fc131e0d82ac4acece"},"schema_version":"1.0"},"canonical_sha256":"77622fbf9c3e0f0eec1eed77473f1becb02e07910b56f5366562868e037bb7d2","source":{"kind":"arxiv","id":"2601.21350","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.21350","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"arxiv_version","alias_value":"2601.21350v2","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.21350","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"pith_short_12","alias_value":"O5RC7P44HYHQ","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"pith_short_16","alias_value":"O5RC7P44HYHQ53A6","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"pith_short_8","alias_value":"O5RC7P44","created_at":"2026-05-20T00:03:03Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:O5RC7P44HYHQ53A65V3UOPY35S","target":"record","payload":{"canonical_record":{"source":{"id":"2601.21350","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-29T07:18:45Z","cross_cats_sorted":[],"title_canon_sha256":"ef9548512bf43c9ae4ec0250288b5e0ce0af5b51c132b44b8796f6e5137f513e","abstract_canon_sha256":"46a5454ef372d5b019bab64da7b7eec03e8f32b07237e0fc131e0d82ac4acece"},"schema_version":"1.0"},"canonical_sha256":"77622fbf9c3e0f0eec1eed77473f1becb02e07910b56f5366562868e037bb7d2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:03.443087Z","signature_b64":"LZOjIH9TVsn/oOaJPN85YfJJLwRKsZVLFMwCZ94rIvIeWI01aMABFC7oIn43JXv/NNjYMxlEQh4SvRBquATfDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"77622fbf9c3e0f0eec1eed77473f1becb02e07910b56f5366562868e037bb7d2","last_reissued_at":"2026-05-20T00:03:03.442240Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:03.442240Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2601.21350","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4KnbW0IwYNRR5L9ZDtF+52gWzDRTLTEOloLI1CA1HebAjLOBL0c2+K2MP7oePonG6pagyIXpZ0qWqBO768bDDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:26:58.365719Z"},"content_sha256":"dc2d10e019616dcb155b34f46acaa96e86f92f391e99b1cf963b644a4eaaa53e","schema_version":"1.0","event_id":"sha256:dc2d10e019616dcb155b34f46acaa96e86f92f391e99b1cf963b644a4eaaa53e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:O5RC7P44HYHQ53A65V3UOPY35S","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Factored Causal Representation Learning for Robust Reward Modeling in RLHF","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Biwei Huang, Fan Feng, Lei Xu, Lin Qu, Lin Yang, Shikui Tu, Wanxi Deng, Yupei Yang","submitted_at":"2026-01-29T07:18:45Z","abstract_excerpt":"A reliable reward model is essential for aligning large language models with human preferences through reinforcement learning from human feedback. However, standard reward models are susceptible to spurious features that are not causally related to human labels. This can lead to reward hacking, where high predicted reward does not translate into better behavior. In this work, we address this problem from a causal perspective by proposing a factored representation learning framework that decomposes the model's contextual embedding into (1) causal factors that are sufficient for reward predictio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.21350","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.21350/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:03Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fEmub6HGThUsiobA67kLD15gR7hOw+mT/xXDogy7oRnqixD+puRP0iQVMqOlGck1a+iJqkFMSujkjnop1GI8BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:26:58.366561Z"},"content_sha256":"fe5700c0cf31c16eff573efb63a8089a24079f9899e2f5041f16d900b33a3963","schema_version":"1.0","event_id":"sha256:fe5700c0cf31c16eff573efb63a8089a24079f9899e2f5041f16d900b33a3963"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/O5RC7P44HYHQ53A65V3UOPY35S/bundle.json","state_url":"https://pith.science/pith/O5RC7P44HYHQ53A65V3UOPY35S/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/O5RC7P44HYHQ53A65V3UOPY35S/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T23:26:58Z","links":{"resolver":"https://pith.science/pith/O5RC7P44HYHQ53A65V3UOPY35S","bundle":"https://pith.science/pith/O5RC7P44HYHQ53A65V3UOPY35S/bundle.json","state":"https://pith.science/pith/O5RC7P44HYHQ53A65V3UOPY35S/state.json","well_known_bundle":"https://pith.science/.well-known/pith/O5RC7P44HYHQ53A65V3UOPY35S/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:O5RC7P44HYHQ53A65V3UOPY35S","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"46a5454ef372d5b019bab64da7b7eec03e8f32b07237e0fc131e0d82ac4acece","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-29T07:18:45Z","title_canon_sha256":"ef9548512bf43c9ae4ec0250288b5e0ce0af5b51c132b44b8796f6e5137f513e"},"schema_version":"1.0","source":{"id":"2601.21350","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.21350","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"arxiv_version","alias_value":"2601.21350v2","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.21350","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"pith_short_12","alias_value":"O5RC7P44HYHQ","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"pith_short_16","alias_value":"O5RC7P44HYHQ53A6","created_at":"2026-05-20T00:03:03Z"},{"alias_kind":"pith_short_8","alias_value":"O5RC7P44","created_at":"2026-05-20T00:03:03Z"}],"graph_snapshots":[{"event_id":"sha256:fe5700c0cf31c16eff573efb63a8089a24079f9899e2f5041f16d900b33a3963","target":"graph","created_at":"2026-05-20T00:03:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2601.21350/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"A reliable reward model is essential for aligning large language models with human preferences through reinforcement learning from human feedback. However, standard reward models are susceptible to spurious features that are not causally related to human labels. This can lead to reward hacking, where high predicted reward does not translate into better behavior. In this work, we address this problem from a causal perspective by proposing a factored representation learning framework that decomposes the model's contextual embedding into (1) causal factors that are sufficient for reward predictio","authors_text":"Biwei Huang, Fan Feng, Lei Xu, Lin Qu, Lin Yang, Shikui Tu, Wanxi Deng, Yupei Yang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-29T07:18:45Z","title":"Factored Causal Representation Learning for Robust Reward Modeling in RLHF"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.21350","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dc2d10e019616dcb155b34f46acaa96e86f92f391e99b1cf963b644a4eaaa53e","target":"record","created_at":"2026-05-20T00:03:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"46a5454ef372d5b019bab64da7b7eec03e8f32b07237e0fc131e0d82ac4acece","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-29T07:18:45Z","title_canon_sha256":"ef9548512bf43c9ae4ec0250288b5e0ce0af5b51c132b44b8796f6e5137f513e"},"schema_version":"1.0","source":{"id":"2601.21350","kind":"arxiv","version":2}},"canonical_sha256":"77622fbf9c3e0f0eec1eed77473f1becb02e07910b56f5366562868e037bb7d2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"77622fbf9c3e0f0eec1eed77473f1becb02e07910b56f5366562868e037bb7d2","first_computed_at":"2026-05-20T00:03:03.442240Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:03.442240Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LZOjIH9TVsn/oOaJPN85YfJJLwRKsZVLFMwCZ94rIvIeWI01aMABFC7oIn43JXv/NNjYMxlEQh4SvRBquATfDA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:03.443087Z","signed_message":"canonical_sha256_bytes"},"source_id":"2601.21350","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dc2d10e019616dcb155b34f46acaa96e86f92f391e99b1cf963b644a4eaaa53e","sha256:fe5700c0cf31c16eff573efb63a8089a24079f9899e2f5041f16d900b33a3963"],"state_sha256":"ea3ae2de846a6c85d4852d22e107748400236c4971b97985645381adcbc867dd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zMEfw4wQ2XvUTqTJP8QXtH4SK8aTGoSH5QVM8vcwpfqhiaMK8GwZTqCq2MDkb7UHfPAStfXWlAHMTmbbERXsDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T23:26:58.370531Z","bundle_sha256":"4dfd11c17e1f1f155d6b3f198b682bd940321f5cdef7fe4e110c7f698926480d"}}