{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:FFLLPJ7YUOL4II65BLCUBLC4WM","short_pith_number":"pith:FFLLPJ7Y","schema_version":"1.0","canonical_sha256":"2956b7a7f8a397c423dd0ac540ac5cb3169e8db25e7c5723a8e45d7e822ac241","source":{"kind":"arxiv","id":"1804.08619","version":1},"attestation_state":"computed","paper":{"title":"State Distribution-aware Sampling for Deep Q-learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Fei Wu, Fuxian Huang, Gang Pan, Weichao Li, Xi Li","submitted_at":"2018-04-23T13:22:22Z","abstract_excerpt":"A critical and challenging problem in reinforcement learning is how to learn the state-action value function from the experience replay buffer and simultaneously keep sample efficiency and faster convergence to a high quality solution. In prior works, transitions are uniformly sampled at random from the replay buffer or sampled based on their priority measured by temporal-difference (TD) error. However, these approaches do not fully take into consideration the intrinsic characteristics of transition distribution in the state space and could result in redundant and unnecessary TD updates, slowi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1804.08619","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-04-23T13:22:22Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"195e5567d95a871d0aab758ddfe30769e80283787ff4aa6faeb78e1759bcaeeb","abstract_canon_sha256":"9091ff6383cb914ac9c672aaa9be2e919177da84a46e68e78ce75680dcae7dd4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:47.197154Z","signature_b64":"+kOu/LIkdLGwMifAH3L8q/A9RKkz9jzBY0SjSSIB2j33nGUkttYKcw3XRn7u9cE+lhLzblTkAyqWcHYdK4SJAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2956b7a7f8a397c423dd0ac540ac5cb3169e8db25e7c5723a8e45d7e822ac241","last_reissued_at":"2026-05-18T00:17:47.196357Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:47.196357Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"State Distribution-aware Sampling for Deep Q-learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Fei Wu, Fuxian Huang, Gang Pan, Weichao Li, Xi Li","submitted_at":"2018-04-23T13:22:22Z","abstract_excerpt":"A critical and challenging problem in reinforcement learning is how to learn the state-action value function from the experience replay buffer and simultaneously keep sample efficiency and faster convergence to a high quality solution. In prior works, transitions are uniformly sampled at random from the replay buffer or sampled based on their priority measured by temporal-difference (TD) error. However, these approaches do not fully take into consideration the intrinsic characteristics of transition distribution in the state space and could result in redundant and unnecessary TD updates, slowi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.08619","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1804.08619","created_at":"2026-05-18T00:17:47.196505+00:00"},{"alias_kind":"arxiv_version","alias_value":"1804.08619v1","created_at":"2026-05-18T00:17:47.196505+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.08619","created_at":"2026-05-18T00:17:47.196505+00:00"},{"alias_kind":"pith_short_12","alias_value":"FFLLPJ7YUOL4","created_at":"2026-05-18T12:32:22.470017+00:00"},{"alias_kind":"pith_short_16","alias_value":"FFLLPJ7YUOL4II65","created_at":"2026-05-18T12:32:22.470017+00:00"},{"alias_kind":"pith_short_8","alias_value":"FFLLPJ7Y","created_at":"2026-05-18T12:32:22.470017+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM","json":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM.json","graph_json":"https://pith.science/api/pith-number/FFLLPJ7YUOL4II65BLCUBLC4WM/graph.json","events_json":"https://pith.science/api/pith-number/FFLLPJ7YUOL4II65BLCUBLC4WM/events.json","paper":"https://pith.science/paper/FFLLPJ7Y"},"agent_actions":{"view_html":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM","download_json":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM.json","view_paper":"https://pith.science/paper/FFLLPJ7Y","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1804.08619&json=true","fetch_graph":"https://pith.science/api/pith-number/FFLLPJ7YUOL4II65BLCUBLC4WM/graph.json","fetch_events":"https://pith.science/api/pith-number/FFLLPJ7YUOL4II65BLCUBLC4WM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM/action/storage_attestation","attest_author":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM/action/author_attestation","sign_citation":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM/action/citation_signature","submit_replication":"https://pith.science/pith/FFLLPJ7YUOL4II65BLCUBLC4WM/action/replication_record"}},"created_at":"2026-05-18T00:17:47.196505+00:00","updated_at":"2026-05-18T00:17:47.196505+00:00"}