{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:5GHKYBHCZ4DWSHCOQKZRGCS7EN","short_pith_number":"pith:5GHKYBHC","canonical_record":{"source":{"id":"1907.06584","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-12T10:13:05Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"d78edb66363f9850f8bc713e6373a5d6b8d5ce28465c0c91cc53fbc2ea130f8b","abstract_canon_sha256":"fda59b58631d281798cbd0efa084216cc41e2cdb1f0d49a21bfe98b86fccead6"},"schema_version":"1.0"},"canonical_sha256":"e98eac04e2cf07691c4e82b3130a5f234a224489bf7ab7109cf7bc7492904f92","source":{"kind":"arxiv","id":"1907.06584","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.06584","created_at":"2026-05-17T23:40:37Z"},{"alias_kind":"arxiv_version","alias_value":"1907.06584v1","created_at":"2026-05-17T23:40:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.06584","created_at":"2026-05-17T23:40:37Z"},{"alias_kind":"pith_short_12","alias_value":"5GHKYBHCZ4DW","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"5GHKYBHCZ4DWSHCO","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"5GHKYBHC","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:5GHKYBHCZ4DWSHCOQKZRGCS7EN","target":"record","payload":{"canonical_record":{"source":{"id":"1907.06584","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-12T10:13:05Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"d78edb66363f9850f8bc713e6373a5d6b8d5ce28465c0c91cc53fbc2ea130f8b","abstract_canon_sha256":"fda59b58631d281798cbd0efa084216cc41e2cdb1f0d49a21bfe98b86fccead6"},"schema_version":"1.0"},"canonical_sha256":"e98eac04e2cf07691c4e82b3130a5f234a224489bf7ab7109cf7bc7492904f92","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:40:37.344730Z","signature_b64":"9Lf52Gn9l5QVDUIAUUgA5uPQFKSeABKhho6thUamFq6oSsE1+8u39qOAq59yg9xAkrddAnaDxZrlfrUe0EZvCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e98eac04e2cf07691c4e82b3130a5f234a224489bf7ab7109cf7bc7492904f92","last_reissued_at":"2026-05-17T23:40:37.344083Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:40:37.344083Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.06584","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:40:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RsFfvlqs7XtysfM1Q6ekOXMu0zsFBkS4CoINuzm+PJF50R8RTg9Ud0oR4HKm4qB1TAxG1VOFHGUCAiBiCI1nAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T02:28:47.968795Z"},"content_sha256":"5659a5190e66512199930f466bd9a1c754ba7d9571a324e90c7b4ee394820d81","schema_version":"1.0","event_id":"sha256:5659a5190e66512199930f466bd9a1c754ba7d9571a324e90c7b4ee394820d81"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:5GHKYBHCZ4DWSHCOQKZRGCS7EN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Environment Reconstruction with Hidden Confounders for Reinforcement Learning based Recommendation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jieping Ye, Qingyang Li, Wenjie Shang, Yang Yu, Yiping Meng, Zhiwei Qin","submitted_at":"2019-07-12T10:13:05Z","abstract_excerpt":"Reinforcement learning aims at searching the best policy model for decision making, and has been shown powerful for sequential recommendations. The training of the policy by reinforcement learning, however, is placed in an environment. In many real-world applications, however, the policy training in the real environment can cause an unbearable cost, due to the exploration in the environment. Environment reconstruction from the past data is thus an appealing way to release the power of reinforcement learning in these applications. The reconstruction of the environment is, basically, to extract "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.06584","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:40:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A7Z98HqkItJZdUowOTSp77C47xZMLWCcW/kElPPER1gLNKQW6QNiKvS6Q7uXHfnEpv3kRnpVLPdJHsqz4gwzDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T02:28:47.969176Z"},"content_sha256":"f4a9951415f6e074b3a819e5b8f9608a8f3449eaf70e21a13db6b5cf6fdcfefc","schema_version":"1.0","event_id":"sha256:f4a9951415f6e074b3a819e5b8f9608a8f3449eaf70e21a13db6b5cf6fdcfefc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN/bundle.json","state_url":"https://pith.science/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T02:28:47Z","links":{"resolver":"https://pith.science/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN","bundle":"https://pith.science/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN/bundle.json","state":"https://pith.science/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5GHKYBHCZ4DWSHCOQKZRGCS7EN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:5GHKYBHCZ4DWSHCOQKZRGCS7EN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fda59b58631d281798cbd0efa084216cc41e2cdb1f0d49a21bfe98b86fccead6","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-12T10:13:05Z","title_canon_sha256":"d78edb66363f9850f8bc713e6373a5d6b8d5ce28465c0c91cc53fbc2ea130f8b"},"schema_version":"1.0","source":{"id":"1907.06584","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.06584","created_at":"2026-05-17T23:40:37Z"},{"alias_kind":"arxiv_version","alias_value":"1907.06584v1","created_at":"2026-05-17T23:40:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.06584","created_at":"2026-05-17T23:40:37Z"},{"alias_kind":"pith_short_12","alias_value":"5GHKYBHCZ4DW","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"5GHKYBHCZ4DWSHCO","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"5GHKYBHC","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:f4a9951415f6e074b3a819e5b8f9608a8f3449eaf70e21a13db6b5cf6fdcfefc","target":"graph","created_at":"2026-05-17T23:40:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning aims at searching the best policy model for decision making, and has been shown powerful for sequential recommendations. The training of the policy by reinforcement learning, however, is placed in an environment. In many real-world applications, however, the policy training in the real environment can cause an unbearable cost, due to the exploration in the environment. Environment reconstruction from the past data is thus an appealing way to release the power of reinforcement learning in these applications. The reconstruction of the environment is, basically, to extract ","authors_text":"Jieping Ye, Qingyang Li, Wenjie Shang, Yang Yu, Yiping Meng, Zhiwei Qin","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-12T10:13:05Z","title":"Environment Reconstruction with Hidden Confounders for Reinforcement Learning based Recommendation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.06584","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5659a5190e66512199930f466bd9a1c754ba7d9571a324e90c7b4ee394820d81","target":"record","created_at":"2026-05-17T23:40:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fda59b58631d281798cbd0efa084216cc41e2cdb1f0d49a21bfe98b86fccead6","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-07-12T10:13:05Z","title_canon_sha256":"d78edb66363f9850f8bc713e6373a5d6b8d5ce28465c0c91cc53fbc2ea130f8b"},"schema_version":"1.0","source":{"id":"1907.06584","kind":"arxiv","version":1}},"canonical_sha256":"e98eac04e2cf07691c4e82b3130a5f234a224489bf7ab7109cf7bc7492904f92","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e98eac04e2cf07691c4e82b3130a5f234a224489bf7ab7109cf7bc7492904f92","first_computed_at":"2026-05-17T23:40:37.344083Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:40:37.344083Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9Lf52Gn9l5QVDUIAUUgA5uPQFKSeABKhho6thUamFq6oSsE1+8u39qOAq59yg9xAkrddAnaDxZrlfrUe0EZvCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:40:37.344730Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.06584","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5659a5190e66512199930f466bd9a1c754ba7d9571a324e90c7b4ee394820d81","sha256:f4a9951415f6e074b3a819e5b8f9608a8f3449eaf70e21a13db6b5cf6fdcfefc"],"state_sha256":"ab0de36b5459276318d7d853b9f367788a111c1fc35a2daa36a3f5fcf278c0e8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VqCRtWQRVm9tb/ShE68MP9qPgEBjMftN3OxRnfTFm8E/b+eOq7JYpP6ez9Rw0O6G0xkJVKgNH9N1CarNlR5UDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T02:28:47.971222Z","bundle_sha256":"92b8a1c8a4d8787cb6540917d6319663f91a28ca780828f6df353d86b0c491ee"}}