{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:7F7Y7U2SXTSHGXBUFLCBLZQC4S","short_pith_number":"pith:7F7Y7U2S","canonical_record":{"source":{"id":"1811.05869","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T15:53:25Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"bf6a22aea2d47090f4dfc113c7219fd982487fbd2985b1e2291cfb5850937e3a","abstract_canon_sha256":"aa10389b936d1d73d7c579cf04254d14ad6f4734dadd187618f03e9acdea197d"},"schema_version":"1.0"},"canonical_sha256":"f97f8fd352bce4735c342ac415e602e4ad983eb4e8983eac811fc35d98c72bed","source":{"kind":"arxiv","id":"1811.05869","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.05869","created_at":"2026-05-18T00:00:41Z"},{"alias_kind":"arxiv_version","alias_value":"1811.05869v1","created_at":"2026-05-18T00:00:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.05869","created_at":"2026-05-18T00:00:41Z"},{"alias_kind":"pith_short_12","alias_value":"7F7Y7U2SXTSH","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"7F7Y7U2SXTSHGXBU","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"7F7Y7U2S","created_at":"2026-05-18T12:32:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:7F7Y7U2SXTSHGXBUFLCBLZQC4S","target":"record","payload":{"canonical_record":{"source":{"id":"1811.05869","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T15:53:25Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"bf6a22aea2d47090f4dfc113c7219fd982487fbd2985b1e2291cfb5850937e3a","abstract_canon_sha256":"aa10389b936d1d73d7c579cf04254d14ad6f4734dadd187618f03e9acdea197d"},"schema_version":"1.0"},"canonical_sha256":"f97f8fd352bce4735c342ac415e602e4ad983eb4e8983eac811fc35d98c72bed","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:41.930149Z","signature_b64":"I5LT3AhK68L6x9hAtIyefXBsIRPvCiEjva43jYM9kRB1dVeR7GBdvnGgYcGR1BwLKvcRxVcsAP3VfcOYG3mhAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f97f8fd352bce4735c342ac415e602e4ad983eb4e8983eac811fc35d98c72bed","last_reissued_at":"2026-05-18T00:00:41.929563Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:41.929563Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.05869","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yRSO6iIP7G5mHFwZqoY7A1pKUzd4TcMcZjaj2Z8hQTLThi2id0xD1pJXDruZUav8oszBSbTkqFj6SZv/npaQBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:17:41.857972Z"},"content_sha256":"3022d637ae189dace25761dfa2440ca3df6a9149e54e45a7ed259bb2edf4b3c3","schema_version":"1.0","event_id":"sha256:3022d637ae189dace25761dfa2440ca3df6a9149e54e45a7ed259bb2edf4b3c3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:7F7Y7U2SXTSHGXBUFLCBLZQC4S","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Large-scale Interactive Recommendation with Tree-structured Policy Gradient","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Han Cai, Haokun Chen, Ruiming Tang, Weinan Zhang, Xinyi Dai, Xuejian Wang, Yong Yu, Yuzhou Zhang","submitted_at":"2018-11-14T15:53:25Z","abstract_excerpt":"Reinforcement learning (RL) has recently been introduced to interactive recommender systems (IRS) because of its nature of learning from dynamic interactions and planning for long-run performance. As IRS is always with thousands of items to recommend (i.e., thousands of actions), most existing RL-based methods, however, fail to handle such a large discrete action space problem and thus become inefficient. The existing work that tries to deal with the large discrete action space problem by utilizing the deep deterministic policy gradient framework suffers from the inconsistency between the cont"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.05869","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ujXHBzPxiZsB5Xp/fQ5TRTs26gyn2M9dDiIYALufG1yMZwahqyrvmu0Ha3O4IPmEKflvDdjNqWQpjcuYKpLhBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T13:17:41.858662Z"},"content_sha256":"29f25c775c0d74e404547b7db7ad12f5b3b5f069bfc0f7663060a09737f8a7de","schema_version":"1.0","event_id":"sha256:29f25c775c0d74e404547b7db7ad12f5b3b5f069bfc0f7663060a09737f8a7de"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S/bundle.json","state_url":"https://pith.science/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T13:17:41Z","links":{"resolver":"https://pith.science/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S","bundle":"https://pith.science/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S/bundle.json","state":"https://pith.science/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7F7Y7U2SXTSHGXBUFLCBLZQC4S/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:7F7Y7U2SXTSHGXBUFLCBLZQC4S","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"aa10389b936d1d73d7c579cf04254d14ad6f4734dadd187618f03e9acdea197d","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T15:53:25Z","title_canon_sha256":"bf6a22aea2d47090f4dfc113c7219fd982487fbd2985b1e2291cfb5850937e3a"},"schema_version":"1.0","source":{"id":"1811.05869","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.05869","created_at":"2026-05-18T00:00:41Z"},{"alias_kind":"arxiv_version","alias_value":"1811.05869v1","created_at":"2026-05-18T00:00:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.05869","created_at":"2026-05-18T00:00:41Z"},{"alias_kind":"pith_short_12","alias_value":"7F7Y7U2SXTSH","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"7F7Y7U2SXTSHGXBU","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"7F7Y7U2S","created_at":"2026-05-18T12:32:11Z"}],"graph_snapshots":[{"event_id":"sha256:29f25c775c0d74e404547b7db7ad12f5b3b5f069bfc0f7663060a09737f8a7de","target":"graph","created_at":"2026-05-18T00:00:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has recently been introduced to interactive recommender systems (IRS) because of its nature of learning from dynamic interactions and planning for long-run performance. As IRS is always with thousands of items to recommend (i.e., thousands of actions), most existing RL-based methods, however, fail to handle such a large discrete action space problem and thus become inefficient. The existing work that tries to deal with the large discrete action space problem by utilizing the deep deterministic policy gradient framework suffers from the inconsistency between the cont","authors_text":"Han Cai, Haokun Chen, Ruiming Tang, Weinan Zhang, Xinyi Dai, Xuejian Wang, Yong Yu, Yuzhou Zhang","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T15:53:25Z","title":"Large-scale Interactive Recommendation with Tree-structured Policy Gradient"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.05869","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3022d637ae189dace25761dfa2440ca3df6a9149e54e45a7ed259bb2edf4b3c3","target":"record","created_at":"2026-05-18T00:00:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"aa10389b936d1d73d7c579cf04254d14ad6f4734dadd187618f03e9acdea197d","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-14T15:53:25Z","title_canon_sha256":"bf6a22aea2d47090f4dfc113c7219fd982487fbd2985b1e2291cfb5850937e3a"},"schema_version":"1.0","source":{"id":"1811.05869","kind":"arxiv","version":1}},"canonical_sha256":"f97f8fd352bce4735c342ac415e602e4ad983eb4e8983eac811fc35d98c72bed","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f97f8fd352bce4735c342ac415e602e4ad983eb4e8983eac811fc35d98c72bed","first_computed_at":"2026-05-18T00:00:41.929563Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:00:41.929563Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"I5LT3AhK68L6x9hAtIyefXBsIRPvCiEjva43jYM9kRB1dVeR7GBdvnGgYcGR1BwLKvcRxVcsAP3VfcOYG3mhAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:00:41.930149Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.05869","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3022d637ae189dace25761dfa2440ca3df6a9149e54e45a7ed259bb2edf4b3c3","sha256:29f25c775c0d74e404547b7db7ad12f5b3b5f069bfc0f7663060a09737f8a7de"],"state_sha256":"4201d0e0c2ab53702e359fd7f8e8ff9aa6e9d742c577b468983a9f9435edf48f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CNAuxq37NxEapdYzVgzXhpwse9BDhtNNX2nDZkZ1PpPCei8NOXmddeftM5M/TJAEgLQhYjzkJcA9hfMMTLFZDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T13:17:41.862200Z","bundle_sha256":"25110060d6630d4e5ed7bead15f04dc322a4fc97a7d5cdc6cce90326a390e2aa"}}