{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:SCL7MHN4FDEYD4GK7TKF4BFKCC","short_pith_number":"pith:SCL7MHN4","canonical_record":{"source":{"id":"1906.07791","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:24:45Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"22c4f6b3cc2fc280ebae3a81277551aa51289aa43b21a4f0e948634ab379a45d","abstract_canon_sha256":"dc443269b56704a6a5c233a4d4aaeddf858946e57ddf6adc16922088b43d3a3f"},"schema_version":"1.0"},"canonical_sha256":"9097f61dbc28c981f0cafcd45e04aa10955447eff2d4bd6daca80ee3b3f5ec63","source":{"kind":"arxiv","id":"1906.07791","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.07791","created_at":"2026-05-17T23:41:30Z"},{"alias_kind":"arxiv_version","alias_value":"1906.07791v3","created_at":"2026-05-17T23:41:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.07791","created_at":"2026-05-17T23:41:30Z"},{"alias_kind":"pith_short_12","alias_value":"SCL7MHN4FDEY","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"SCL7MHN4FDEYD4GK","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"SCL7MHN4","created_at":"2026-05-18T12:33:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:SCL7MHN4FDEYD4GK7TKF4BFKCC","target":"record","payload":{"canonical_record":{"source":{"id":"1906.07791","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:24:45Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"22c4f6b3cc2fc280ebae3a81277551aa51289aa43b21a4f0e948634ab379a45d","abstract_canon_sha256":"dc443269b56704a6a5c233a4d4aaeddf858946e57ddf6adc16922088b43d3a3f"},"schema_version":"1.0"},"canonical_sha256":"9097f61dbc28c981f0cafcd45e04aa10955447eff2d4bd6daca80ee3b3f5ec63","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:30.212257Z","signature_b64":"+UII6fMMoITfO4AbIHyMMNviY4ezZ9QAtACYObsOHHFflH6mdOGsy03zvAuJZE0VuWFNDWi0R5KT+ZPI5YNlAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9097f61dbc28c981f0cafcd45e04aa10955447eff2d4bd6daca80ee3b3f5ec63","last_reissued_at":"2026-05-17T23:41:30.211448Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:30.211448Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.07791","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3wU9knCRMWowA8q/87TAAqUnNKT0AkYct7AU7UlqZYbt5j+RbIBbZKyY+bvwKrOyca+5U6iWdWNCBDzLO39SCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T18:12:42.441509Z"},"content_sha256":"e6130f790c9ab1e40fecad06c87c40e2b13967e40ee920f6a3541ed1153bf759","schema_version":"1.0","event_id":"sha256:e6130f790c9ab1e40fecad06c87c40e2b13967e40ee920f6a3541ed1153bf759"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:SCL7MHN4FDEYD4GK7TKF4BFKCC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Hill Climbing on Value Estimates for Search-control in Dyna","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Amir-massoud Farahmand, Hengshuai Yao, Martha White, Yangchen Pan","submitted_at":"2019-06-18T20:24:45Z","abstract_excerpt":"Dyna is an architecture for model-based reinforcement learning (RL), where simulated experience from a model is used to update policies or value functions. A key component of Dyna is search-control, the mechanism to generate the state and action from which the agent queries the model, which remains largely unexplored. In this work, we propose to generate such states by using the trajectory obtained from Hill Climbing (HC) the current estimate of the value function. This has the effect of propagating value from high-value regions and of preemptively updating value estimates of the regions that "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.07791","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PJFZ9yHFmKK3MZ07jPm2S1KAZ7+OEtzRlS28YJ8tLi27pRslQ3THvH1BaldXH36gbY0Dl5TVB3ykO38zqkOmDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T18:12:42.442138Z"},"content_sha256":"bd5d2c5c6ddae435bbc5fe2143c8b7209a44c3265b5474ddc5126f62681e3d9a","schema_version":"1.0","event_id":"sha256:bd5d2c5c6ddae435bbc5fe2143c8b7209a44c3265b5474ddc5126f62681e3d9a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC/bundle.json","state_url":"https://pith.science/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T18:12:42Z","links":{"resolver":"https://pith.science/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC","bundle":"https://pith.science/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC/bundle.json","state":"https://pith.science/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SCL7MHN4FDEYD4GK7TKF4BFKCC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:SCL7MHN4FDEYD4GK7TKF4BFKCC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"dc443269b56704a6a5c233a4d4aaeddf858946e57ddf6adc16922088b43d3a3f","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:24:45Z","title_canon_sha256":"22c4f6b3cc2fc280ebae3a81277551aa51289aa43b21a4f0e948634ab379a45d"},"schema_version":"1.0","source":{"id":"1906.07791","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.07791","created_at":"2026-05-17T23:41:30Z"},{"alias_kind":"arxiv_version","alias_value":"1906.07791v3","created_at":"2026-05-17T23:41:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.07791","created_at":"2026-05-17T23:41:30Z"},{"alias_kind":"pith_short_12","alias_value":"SCL7MHN4FDEY","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"SCL7MHN4FDEYD4GK","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"SCL7MHN4","created_at":"2026-05-18T12:33:27Z"}],"graph_snapshots":[{"event_id":"sha256:bd5d2c5c6ddae435bbc5fe2143c8b7209a44c3265b5474ddc5126f62681e3d9a","target":"graph","created_at":"2026-05-17T23:41:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Dyna is an architecture for model-based reinforcement learning (RL), where simulated experience from a model is used to update policies or value functions. A key component of Dyna is search-control, the mechanism to generate the state and action from which the agent queries the model, which remains largely unexplored. In this work, we propose to generate such states by using the trajectory obtained from Hill Climbing (HC) the current estimate of the value function. This has the effect of propagating value from high-value regions and of preemptively updating value estimates of the regions that ","authors_text":"Amir-massoud Farahmand, Hengshuai Yao, Martha White, Yangchen Pan","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:24:45Z","title":"Hill Climbing on Value Estimates for Search-control in Dyna"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.07791","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e6130f790c9ab1e40fecad06c87c40e2b13967e40ee920f6a3541ed1153bf759","target":"record","created_at":"2026-05-17T23:41:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"dc443269b56704a6a5c233a4d4aaeddf858946e57ddf6adc16922088b43d3a3f","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-18T20:24:45Z","title_canon_sha256":"22c4f6b3cc2fc280ebae3a81277551aa51289aa43b21a4f0e948634ab379a45d"},"schema_version":"1.0","source":{"id":"1906.07791","kind":"arxiv","version":3}},"canonical_sha256":"9097f61dbc28c981f0cafcd45e04aa10955447eff2d4bd6daca80ee3b3f5ec63","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9097f61dbc28c981f0cafcd45e04aa10955447eff2d4bd6daca80ee3b3f5ec63","first_computed_at":"2026-05-17T23:41:30.211448Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:41:30.211448Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+UII6fMMoITfO4AbIHyMMNviY4ezZ9QAtACYObsOHHFflH6mdOGsy03zvAuJZE0VuWFNDWi0R5KT+ZPI5YNlAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:41:30.212257Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.07791","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e6130f790c9ab1e40fecad06c87c40e2b13967e40ee920f6a3541ed1153bf759","sha256:bd5d2c5c6ddae435bbc5fe2143c8b7209a44c3265b5474ddc5126f62681e3d9a"],"state_sha256":"8cba2d11224067a10b7a7f0fe6a654aa1bb9f0f0ae9c49bcb92fdebb5e9974a6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oTvRh0jzGx0ljwp71MpWIyDOoyduiV6R55+HhjZY8Q9r9/weeDY381dceA36w9JyaLeZ3JWRUu68JfIzYAWXAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T18:12:42.451478Z","bundle_sha256":"4ec3a518681f071a904fd214ad4da2290653debd78fc86965019d97939a9c9fe"}}