{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:CVFWSXASCVF7YZTIDJVO73SSIF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"40f72ffe719f547d168d535af6f8f3da67feca35e8c3c453a75a156b87221c3a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T03:57:45Z","title_canon_sha256":"8f62d2b90899437f8b2a3247af2b4d9feec724321666567a1f3ba432bb066a72"},"schema_version":"1.0","source":{"id":"1803.00710","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00710","created_at":"2026-05-18T00:15:09Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00710v3","created_at":"2026-05-18T00:15:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00710","created_at":"2026-05-18T00:15:09Z"},{"alias_kind":"pith_short_12","alias_value":"CVFWSXASCVF7","created_at":"2026-05-18T12:32:19Z"},{"alias_kind":"pith_short_16","alias_value":"CVFWSXASCVF7YZTI","created_at":"2026-05-18T12:32:19Z"},{"alias_kind":"pith_short_8","alias_value":"CVFWSXAS","created_at":"2026-05-18T12:32:19Z"}],"graph_snapshots":[{"event_id":"sha256:38cc6c91053975d6aa07b300dd2dc30b77c5197b9107210e7ddd53da69e28e3e","target":"graph","created_at":"2026-05-18T00:15:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In e-commerce platforms such as Amazon and TaoBao, ranking items in a search session is a typical multi-step decision-making problem. Learning to rank (LTR) methods have been widely applied to ranking problems. However, such methods often consider different ranking steps in a session to be independent, which conversely may be highly correlated to each other. For better utilizing the correlation between different ranking steps, in this paper, we propose to use reinforcement learning (RL) to learn an optimal ranking policy which maximizes the expected accumulative rewards in a search session. Fi","authors_text":"Anxiang Zeng, Qing Da, Yang Yu, Yinghui Xu, Yujing Hu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T03:57:45Z","title":"Reinforcement Learning to Rank in E-Commerce Search Engine: Formalization, Analysis, and Application"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00710","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a54c026e9320257961da17262fd33ce5a6634842200445d031db8db96a0549cf","target":"record","created_at":"2026-05-18T00:15:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"40f72ffe719f547d168d535af6f8f3da67feca35e8c3c453a75a156b87221c3a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T03:57:45Z","title_canon_sha256":"8f62d2b90899437f8b2a3247af2b4d9feec724321666567a1f3ba432bb066a72"},"schema_version":"1.0","source":{"id":"1803.00710","kind":"arxiv","version":3}},"canonical_sha256":"154b695c12154bfc66681a6aefee524163fba85172179a016106fcab803fa4bf","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"154b695c12154bfc66681a6aefee524163fba85172179a016106fcab803fa4bf","first_computed_at":"2026-05-18T00:15:09.661169Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:15:09.661169Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4o8y+rnMPKPQGsxZR5B10F3BroLSOwbYXFshKszxzqkuls7YScDp5ChpklzzNODIGPEgOa3F+ttupD730OaBBA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:15:09.661809Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.00710","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a54c026e9320257961da17262fd33ce5a6634842200445d031db8db96a0549cf","sha256:38cc6c91053975d6aa07b300dd2dc30b77c5197b9107210e7ddd53da69e28e3e"],"state_sha256":"8251a637cb75d41ea577363f28fe6c5187b5ef5c4776f625eaab25408b2b1e85"}