{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:P5GZOULXFYCCDNST3TVFGL5QF6","short_pith_number":"pith:P5GZOULX","schema_version":"1.0","canonical_sha256":"7f4d9751772e0421b653dcea532fb02fb93f8aabdbe4001577e29c067f815213","source":{"kind":"arxiv","id":"1808.00720","version":2},"attestation_state":"computed","paper":{"title":"RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Alexandros Karatzoglou, David Rohde, Flavian Vasile, Stephen Bonner, Travis Dunlop","submitted_at":"2018-08-02T09:13:18Z","abstract_excerpt":"Recommender Systems are becoming ubiquitous in many settings and take many forms, from product recommendation in e-commerce stores, to query suggestions in search engines, to friend recommendation in social networks. Current research directions which are largely based upon supervised learning from historical data appear to be showing diminishing returns with a lot of practitioners report a discrepancy between improvements in offline metrics for supervised learning and the online performance of the newly proposed models. One possible reason is that we are using the wrong paradigm: when looking "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1808.00720","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-08-02T09:13:18Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"350a3fe4c886ed32e77adb294c0637a1a8a012ce8eb2cc61130732b2a39b8043","abstract_canon_sha256":"7de5426d4a7bf4e47c84f7f8bd3132c5fda4366ecf78893d9d9abf45b1d47165"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:05:45.159727Z","signature_b64":"HU9pVimR0uiadAqhBrpt6A054/IwMZBL5xkLjGcquGs5Rz9juoSGE5Y/RE4awtqmqXC1y5Z0Jv5FeosODisjDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7f4d9751772e0421b653dcea532fb02fb93f8aabdbe4001577e29c067f815213","last_reissued_at":"2026-05-18T00:05:45.159013Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:05:45.159013Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RecoGym: A Reinforcement Learning Environment for the problem of Product Recommendation in Online Advertising","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.IR","authors_text":"Alexandros Karatzoglou, David Rohde, Flavian Vasile, Stephen Bonner, Travis Dunlop","submitted_at":"2018-08-02T09:13:18Z","abstract_excerpt":"Recommender Systems are becoming ubiquitous in many settings and take many forms, from product recommendation in e-commerce stores, to query suggestions in search engines, to friend recommendation in social networks. Current research directions which are largely based upon supervised learning from historical data appear to be showing diminishing returns with a lot of practitioners report a discrepancy between improvements in offline metrics for supervised learning and the online performance of the newly proposed models. One possible reason is that we are using the wrong paradigm: when looking "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.00720","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1808.00720","created_at":"2026-05-18T00:05:45.159124+00:00"},{"alias_kind":"arxiv_version","alias_value":"1808.00720v2","created_at":"2026-05-18T00:05:45.159124+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.00720","created_at":"2026-05-18T00:05:45.159124+00:00"},{"alias_kind":"pith_short_12","alias_value":"P5GZOULXFYCC","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"P5GZOULXFYCCDNST","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"P5GZOULX","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.18805","citing_title":"RecoAtlas: From Semantic Plausibility to Set-Level Utility in LLM Recommendation Agents","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2507.18756","citing_title":"Exploitation Over Exploration: Unmasking the Bias in Linear Bandit Recommender Offline Evaluation","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24977","citing_title":"A Survey on LLM-based Conversational User Simulation","ref_index":26,"is_internal_anchor":false},{"citing_arxiv_id":"2604.21750","citing_title":"Multistakeholder Impacts of Profile Portability in a Recommender Ecosystem","ref_index":57,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6","json":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6.json","graph_json":"https://pith.science/api/pith-number/P5GZOULXFYCCDNST3TVFGL5QF6/graph.json","events_json":"https://pith.science/api/pith-number/P5GZOULXFYCCDNST3TVFGL5QF6/events.json","paper":"https://pith.science/paper/P5GZOULX"},"agent_actions":{"view_html":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6","download_json":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6.json","view_paper":"https://pith.science/paper/P5GZOULX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1808.00720&json=true","fetch_graph":"https://pith.science/api/pith-number/P5GZOULXFYCCDNST3TVFGL5QF6/graph.json","fetch_events":"https://pith.science/api/pith-number/P5GZOULXFYCCDNST3TVFGL5QF6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6/action/storage_attestation","attest_author":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6/action/author_attestation","sign_citation":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6/action/citation_signature","submit_replication":"https://pith.science/pith/P5GZOULXFYCCDNST3TVFGL5QF6/action/replication_record"}},"created_at":"2026-05-18T00:05:45.159124+00:00","updated_at":"2026-05-18T00:05:45.159124+00:00"}