{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2012:P6KTRYMWJ5PDF4VEKIC7OIAYC3","short_pith_number":"pith:P6KTRYMW","schema_version":"1.0","canonical_sha256":"7f9538e1964f5e32f2a45205f7201816e2363a6a4e0f268f55e64f85da32b4e5","source":{"kind":"arxiv","id":"1206.6262","version":1},"attestation_state":"computed","paper":{"title":"Scaling Life-long Off-policy Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Adam White, Joseph Modayil, Richard S. Sutton","submitted_at":"2012-06-27T13:27:56Z","abstract_excerpt":"We pursue a life-long learning approach to artificial intelligence that makes extensive use of reinforcement learning algorithms. We build on our prior work with general value functions (GVFs) and the Horde architecture. GVFs have been shown able to represent a wide variety of facts about the world's dynamics that may be useful to a long-lived agent (Sutton et al. 2011). We have also previously shown scaling - that thousands of on-policy GVFs can be learned accurately in real-time on a mobile robot (Modayil, White & Sutton 2011). That work was limited in that it learned about only one policy a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1206.6262","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2012-06-27T13:27:56Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"4b61b15a49b065bbae9e944d76b00e56d5af44e579f75ac17a0d4fe499a556fd","abstract_canon_sha256":"2891a50f0a87b8e9e7194a038006ab6f201b559278bdecdccb0bfefbff1a7486"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:52:27.196336Z","signature_b64":"rohGuNonAexS9gejpkKZ8s/naxI7l66hqRiXtdcJ824cSht3Kh5vEYyIAdPRs08GeQ/F98NY0dGImgd5KT3GAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7f9538e1964f5e32f2a45205f7201816e2363a6a4e0f268f55e64f85da32b4e5","last_reissued_at":"2026-05-18T03:52:27.195769Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:52:27.195769Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Scaling Life-long Off-policy Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Adam White, Joseph Modayil, Richard S. Sutton","submitted_at":"2012-06-27T13:27:56Z","abstract_excerpt":"We pursue a life-long learning approach to artificial intelligence that makes extensive use of reinforcement learning algorithms. We build on our prior work with general value functions (GVFs) and the Horde architecture. GVFs have been shown able to represent a wide variety of facts about the world's dynamics that may be useful to a long-lived agent (Sutton et al. 2011). We have also previously shown scaling - that thousands of on-policy GVFs can be learned accurately in real-time on a mobile robot (Modayil, White & Sutton 2011). That work was limited in that it learned about only one policy a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.6262","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1206.6262","created_at":"2026-05-18T03:52:27.195862+00:00"},{"alias_kind":"arxiv_version","alias_value":"1206.6262v1","created_at":"2026-05-18T03:52:27.195862+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.6262","created_at":"2026-05-18T03:52:27.195862+00:00"},{"alias_kind":"pith_short_12","alias_value":"P6KTRYMWJ5PD","created_at":"2026-05-18T12:27:18.751474+00:00"},{"alias_kind":"pith_short_16","alias_value":"P6KTRYMWJ5PDF4VE","created_at":"2026-05-18T12:27:18.751474+00:00"},{"alias_kind":"pith_short_8","alias_value":"P6KTRYMW","created_at":"2026-05-18T12:27:18.751474+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.22711","citing_title":"Abstraction for Offline Goal-Conditioned Reinforcement Learning","ref_index":22,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3","json":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3.json","graph_json":"https://pith.science/api/pith-number/P6KTRYMWJ5PDF4VEKIC7OIAYC3/graph.json","events_json":"https://pith.science/api/pith-number/P6KTRYMWJ5PDF4VEKIC7OIAYC3/events.json","paper":"https://pith.science/paper/P6KTRYMW"},"agent_actions":{"view_html":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3","download_json":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3.json","view_paper":"https://pith.science/paper/P6KTRYMW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1206.6262&json=true","fetch_graph":"https://pith.science/api/pith-number/P6KTRYMWJ5PDF4VEKIC7OIAYC3/graph.json","fetch_events":"https://pith.science/api/pith-number/P6KTRYMWJ5PDF4VEKIC7OIAYC3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3/action/storage_attestation","attest_author":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3/action/author_attestation","sign_citation":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3/action/citation_signature","submit_replication":"https://pith.science/pith/P6KTRYMWJ5PDF4VEKIC7OIAYC3/action/replication_record"}},"created_at":"2026-05-18T03:52:27.195862+00:00","updated_at":"2026-05-18T03:52:27.195862+00:00"}