{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2013:675YLK42V7XTNZ22KGCV7VQ5CI","short_pith_number":"pith:675YLK42","schema_version":"1.0","canonical_sha256":"f7fb85ab9aafef36e75a51855fd61d120ec0ce46f314fd164263d3322e89fe69","source":{"kind":"arxiv","id":"1307.0813","version":2},"attestation_state":"computed","paper":{"title":"Multi-Task Policy Search","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.RO"],"primary_cat":"stat.ML","authors_text":"Dieter Fox, Jan Peters, Marc Peter Deisenroth, Peter Englert","submitted_at":"2013-07-02T07:59:32Z","abstract_excerpt":"Learning policies that generalize across multiple tasks is an important and challenging research topic in reinforcement learning and robotics. Training individual policies for every single potential task is often impractical, especially for continuous task variations, requiring more principled approaches to share and transfer knowledge among similar tasks. We present a novel approach for learning a nonlinear feedback policy that generalizes across multiple tasks. The key idea is to define a parametrized policy as a function of both the state and the task, which allows learning a single policy "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1307.0813","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2013-07-02T07:59:32Z","cross_cats_sorted":["cs.AI","cs.LG","cs.RO"],"title_canon_sha256":"dc7460c2d18d4a9e316c62794a4da23d2479b3668f1a3e14c521b146689a506c","abstract_canon_sha256":"828d7a3bc88198cc6f5dd2cc8a4afe89a91b04180d6e5bfe3a4b1137e9768d3d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:59:18.462075Z","signature_b64":"BSTzYLDiO3V6qwOQ/X61WgnUQIfAhH8+8p/Hf9+1sgACIiD5DV3IzRjsNh06loBHevCHogrLZ7X4ntzlNhRSBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f7fb85ab9aafef36e75a51855fd61d120ec0ce46f314fd164263d3322e89fe69","last_reissued_at":"2026-05-18T02:59:18.461373Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:59:18.461373Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Multi-Task Policy Search","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.RO"],"primary_cat":"stat.ML","authors_text":"Dieter Fox, Jan Peters, Marc Peter Deisenroth, Peter Englert","submitted_at":"2013-07-02T07:59:32Z","abstract_excerpt":"Learning policies that generalize across multiple tasks is an important and challenging research topic in reinforcement learning and robotics. Training individual policies for every single potential task is often impractical, especially for continuous task variations, requiring more principled approaches to share and transfer knowledge among similar tasks. We present a novel approach for learning a nonlinear feedback policy that generalizes across multiple tasks. The key idea is to define a parametrized policy as a function of both the state and the task, which allows learning a single policy "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1307.0813","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1307.0813","created_at":"2026-05-18T02:59:18.461485+00:00"},{"alias_kind":"arxiv_version","alias_value":"1307.0813v2","created_at":"2026-05-18T02:59:18.461485+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1307.0813","created_at":"2026-05-18T02:59:18.461485+00:00"},{"alias_kind":"pith_short_12","alias_value":"675YLK42V7XT","created_at":"2026-05-18T12:27:36.564083+00:00"},{"alias_kind":"pith_short_16","alias_value":"675YLK42V7XTNZ22","created_at":"2026-05-18T12:27:36.564083+00:00"},{"alias_kind":"pith_short_8","alias_value":"675YLK42","created_at":"2026-05-18T12:27:36.564083+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI","json":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI.json","graph_json":"https://pith.science/api/pith-number/675YLK42V7XTNZ22KGCV7VQ5CI/graph.json","events_json":"https://pith.science/api/pith-number/675YLK42V7XTNZ22KGCV7VQ5CI/events.json","paper":"https://pith.science/paper/675YLK42"},"agent_actions":{"view_html":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI","download_json":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI.json","view_paper":"https://pith.science/paper/675YLK42","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1307.0813&json=true","fetch_graph":"https://pith.science/api/pith-number/675YLK42V7XTNZ22KGCV7VQ5CI/graph.json","fetch_events":"https://pith.science/api/pith-number/675YLK42V7XTNZ22KGCV7VQ5CI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI/action/storage_attestation","attest_author":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI/action/author_attestation","sign_citation":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI/action/citation_signature","submit_replication":"https://pith.science/pith/675YLK42V7XTNZ22KGCV7VQ5CI/action/replication_record"}},"created_at":"2026-05-18T02:59:18.461485+00:00","updated_at":"2026-05-18T02:59:18.461485+00:00"}