{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:ECJQWTB3QNVN2PAUJGGWFMCN57","short_pith_number":"pith:ECJQWTB3","canonical_record":{"source":{"id":"1603.00448","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T20:35:56Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"d525ed1dc2876547f17fdda1f9efb67b36c37680d312aaeb427dbc0c36240e22","abstract_canon_sha256":"48fddc5845838a6e2fcbea35f45c6f329a244ec82912105f305c34ba40608ad0"},"schema_version":"1.0"},"canonical_sha256":"20930b4c3b836add3c14498d62b04defcd7d32bc46587b57f1ffd578e2bef9ca","source":{"kind":"arxiv","id":"1603.00448","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00448","created_at":"2026-05-18T01:13:30Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00448v3","created_at":"2026-05-18T01:13:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00448","created_at":"2026-05-18T01:13:30Z"},{"alias_kind":"pith_short_12","alias_value":"ECJQWTB3QNVN","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_16","alias_value":"ECJQWTB3QNVN2PAU","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_8","alias_value":"ECJQWTB3","created_at":"2026-05-18T12:30:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:ECJQWTB3QNVN2PAUJGGWFMCN57","target":"record","payload":{"canonical_record":{"source":{"id":"1603.00448","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T20:35:56Z","cross_cats_sorted":["cs.AI","cs.RO"],"title_canon_sha256":"d525ed1dc2876547f17fdda1f9efb67b36c37680d312aaeb427dbc0c36240e22","abstract_canon_sha256":"48fddc5845838a6e2fcbea35f45c6f329a244ec82912105f305c34ba40608ad0"},"schema_version":"1.0"},"canonical_sha256":"20930b4c3b836add3c14498d62b04defcd7d32bc46587b57f1ffd578e2bef9ca","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:13:30.429003Z","signature_b64":"dHlah+cFqSbOtslMHaZhEmIPLrJb/5X9+nLHvq1nxJ2x8NphjJcgSs85U+wye34fJtGJMB7dnbxLRdsdTxYBBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"20930b4c3b836add3c14498d62b04defcd7d32bc46587b57f1ffd578e2bef9ca","last_reissued_at":"2026-05-18T01:13:30.428171Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:13:30.428171Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1603.00448","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:13:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4RPirDuB1zRnLQdbq/WqiFHnJLqQbtwuB+iHQ2H3avKKNjyVgKz/605fKKe+soRLmPQnB0lB1u5EonTFtlXpDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T14:32:15.471173Z"},"content_sha256":"cef5653c66bede6fd73f0344e96d6562881a3279cdbdf7cfa0bf9bf83cd0c28e","schema_version":"1.0","event_id":"sha256:cef5653c66bede6fd73f0344e96d6562881a3279cdbdf7cfa0bf9bf83cd0c28e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:ECJQWTB3QNVN2PAUJGGWFMCN57","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Guided Cost Learning: Deep Inverse Optimal Control via Policy Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.RO"],"primary_cat":"cs.LG","authors_text":"Chelsea Finn, Pieter Abbeel, Sergey Levine","submitted_at":"2016-03-01T20:35:56Z","abstract_excerpt":"Reinforcement learning can acquire complex behaviors from high-level specifications. However, defining a cost function that can be optimized effectively and encodes the correct task is challenging in practice. We explore how inverse optimal control (IOC) can be used to learn behaviors from demonstrations, with applications to torque control of high-dimensional robotic systems. Our method addresses two key challenges in inverse optimal control: first, the need for informative features and effective regularization to impose structure on the cost, and second, the difficulty of learning the cost f"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00448","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:13:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CXcpbAfAMOwiOVdzyU84gMeqO2ZwHHY89Fx+TpnBIB48mM/p8VtAMkg0xYsbc/8e1O6HCmvgOCMVc9HC89mBCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T14:32:15.471547Z"},"content_sha256":"e2a85d04e6fbcb9c258fa4c751d4bab5c90c49e5edacc1ba77221adf4273b5a2","schema_version":"1.0","event_id":"sha256:e2a85d04e6fbcb9c258fa4c751d4bab5c90c49e5edacc1ba77221adf4273b5a2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ECJQWTB3QNVN2PAUJGGWFMCN57/bundle.json","state_url":"https://pith.science/pith/ECJQWTB3QNVN2PAUJGGWFMCN57/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ECJQWTB3QNVN2PAUJGGWFMCN57/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T14:32:15Z","links":{"resolver":"https://pith.science/pith/ECJQWTB3QNVN2PAUJGGWFMCN57","bundle":"https://pith.science/pith/ECJQWTB3QNVN2PAUJGGWFMCN57/bundle.json","state":"https://pith.science/pith/ECJQWTB3QNVN2PAUJGGWFMCN57/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ECJQWTB3QNVN2PAUJGGWFMCN57/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:ECJQWTB3QNVN2PAUJGGWFMCN57","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"48fddc5845838a6e2fcbea35f45c6f329a244ec82912105f305c34ba40608ad0","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T20:35:56Z","title_canon_sha256":"d525ed1dc2876547f17fdda1f9efb67b36c37680d312aaeb427dbc0c36240e22"},"schema_version":"1.0","source":{"id":"1603.00448","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.00448","created_at":"2026-05-18T01:13:30Z"},{"alias_kind":"arxiv_version","alias_value":"1603.00448v3","created_at":"2026-05-18T01:13:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.00448","created_at":"2026-05-18T01:13:30Z"},{"alias_kind":"pith_short_12","alias_value":"ECJQWTB3QNVN","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_16","alias_value":"ECJQWTB3QNVN2PAU","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_8","alias_value":"ECJQWTB3","created_at":"2026-05-18T12:30:12Z"}],"graph_snapshots":[{"event_id":"sha256:e2a85d04e6fbcb9c258fa4c751d4bab5c90c49e5edacc1ba77221adf4273b5a2","target":"graph","created_at":"2026-05-18T01:13:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning can acquire complex behaviors from high-level specifications. However, defining a cost function that can be optimized effectively and encodes the correct task is challenging in practice. We explore how inverse optimal control (IOC) can be used to learn behaviors from demonstrations, with applications to torque control of high-dimensional robotic systems. Our method addresses two key challenges in inverse optimal control: first, the need for informative features and effective regularization to impose structure on the cost, and second, the difficulty of learning the cost f","authors_text":"Chelsea Finn, Pieter Abbeel, Sergey Levine","cross_cats":["cs.AI","cs.RO"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T20:35:56Z","title":"Guided Cost Learning: Deep Inverse Optimal Control via Policy Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.00448","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cef5653c66bede6fd73f0344e96d6562881a3279cdbdf7cfa0bf9bf83cd0c28e","target":"record","created_at":"2026-05-18T01:13:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"48fddc5845838a6e2fcbea35f45c6f329a244ec82912105f305c34ba40608ad0","cross_cats_sorted":["cs.AI","cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-03-01T20:35:56Z","title_canon_sha256":"d525ed1dc2876547f17fdda1f9efb67b36c37680d312aaeb427dbc0c36240e22"},"schema_version":"1.0","source":{"id":"1603.00448","kind":"arxiv","version":3}},"canonical_sha256":"20930b4c3b836add3c14498d62b04defcd7d32bc46587b57f1ffd578e2bef9ca","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"20930b4c3b836add3c14498d62b04defcd7d32bc46587b57f1ffd578e2bef9ca","first_computed_at":"2026-05-18T01:13:30.428171Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:13:30.428171Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dHlah+cFqSbOtslMHaZhEmIPLrJb/5X9+nLHvq1nxJ2x8NphjJcgSs85U+wye34fJtGJMB7dnbxLRdsdTxYBBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:13:30.429003Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.00448","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cef5653c66bede6fd73f0344e96d6562881a3279cdbdf7cfa0bf9bf83cd0c28e","sha256:e2a85d04e6fbcb9c258fa4c751d4bab5c90c49e5edacc1ba77221adf4273b5a2"],"state_sha256":"fae3494810ad05459fb78c20466bf1a6c010cd138d40d2c4cd607f5326b9ac2d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5ywcc5v0Ja2HGBT6lCqv1bFEVDiZ+GbglkgGcZ+K1MeIqoZ8ublmqXL75IyDO6azolQALe4clG7xC4MwwpyfDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T14:32:15.473536Z","bundle_sha256":"97df0faa8d8f732cd3249a41b740e42432e4ec541e9fd8f786d6b940afe8f1cc"}}