{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:FJJUDIEMBAL4QJGEMAAAA4DFL3","short_pith_number":"pith:FJJUDIEM","schema_version":"1.0","canonical_sha256":"2a5341a08c0817c824c460000070655ec93a3e085636954d02d8694fed7a439e","source":{"kind":"arxiv","id":"1612.06699","version":3},"attestation_state":"computed","paper":{"title":"Unsupervised Perceptual Rewards for Imitation Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Kelvin Xu, Pierre Sermanet, Sergey Levine","submitted_at":"2016-12-20T15:04:38Z","abstract_excerpt":"Reward function design and exploration time are arguably the biggest obstacles to the deployment of reinforcement learning (RL) agents in the real world. In many real-world tasks, designing a reward function takes considerable hand engineering and often requires additional sensors to be installed just to measure whether the task has been executed successfully. Furthermore, many interesting tasks consist of multiple implicit intermediate steps that must be executed in sequence. Even when the final outcome can be measured, it does not necessarily provide feedback on these intermediate steps. To "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1612.06699","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-12-20T15:04:38Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"4d37310d69723873460a0bd2417015b0fbd9baa318d267b0763ef7312b708df7","abstract_canon_sha256":"3021278c465384e8c489e1b55cd96d19dd68a041437d16d6e008199428d436ba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:42:30.142988Z","signature_b64":"0uyfHxpw8d0q5vq+hM82jI1wVUP9xuNFaeVAYwgJjU7AxFTkKZYIM0o1diTCtKd4AP8GkSgxPjkQa1qQNPRYCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2a5341a08c0817c824c460000070655ec93a3e085636954d02d8694fed7a439e","last_reissued_at":"2026-05-18T00:42:30.142290Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:42:30.142290Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Unsupervised Perceptual Rewards for Imitation Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CV","authors_text":"Kelvin Xu, Pierre Sermanet, Sergey Levine","submitted_at":"2016-12-20T15:04:38Z","abstract_excerpt":"Reward function design and exploration time are arguably the biggest obstacles to the deployment of reinforcement learning (RL) agents in the real world. In many real-world tasks, designing a reward function takes considerable hand engineering and often requires additional sensors to be installed just to measure whether the task has been executed successfully. Furthermore, many interesting tasks consist of multiple implicit intermediate steps that must be executed in sequence. Even when the final outcome can be measured, it does not necessarily provide feedback on these intermediate steps. To "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.06699","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1612.06699","created_at":"2026-05-18T00:42:30.142419+00:00"},{"alias_kind":"arxiv_version","alias_value":"1612.06699v3","created_at":"2026-05-18T00:42:30.142419+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.06699","created_at":"2026-05-18T00:42:30.142419+00:00"},{"alias_kind":"pith_short_12","alias_value":"FJJUDIEMBAL4","created_at":"2026-05-18T12:30:15.759754+00:00"},{"alias_kind":"pith_short_16","alias_value":"FJJUDIEMBAL4QJGE","created_at":"2026-05-18T12:30:15.759754+00:00"},{"alias_kind":"pith_short_8","alias_value":"FJJUDIEM","created_at":"2026-05-18T12:30:15.759754+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2210.00030","citing_title":"VIP: Towards Universal Visual Reward and Representation via Value-Implicit Pre-Training","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2310.08864","citing_title":"Open X-Embodiment: Robotic Learning Datasets and RT-X Models","ref_index":44,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3","json":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3.json","graph_json":"https://pith.science/api/pith-number/FJJUDIEMBAL4QJGEMAAAA4DFL3/graph.json","events_json":"https://pith.science/api/pith-number/FJJUDIEMBAL4QJGEMAAAA4DFL3/events.json","paper":"https://pith.science/paper/FJJUDIEM"},"agent_actions":{"view_html":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3","download_json":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3.json","view_paper":"https://pith.science/paper/FJJUDIEM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1612.06699&json=true","fetch_graph":"https://pith.science/api/pith-number/FJJUDIEMBAL4QJGEMAAAA4DFL3/graph.json","fetch_events":"https://pith.science/api/pith-number/FJJUDIEMBAL4QJGEMAAAA4DFL3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3/action/storage_attestation","attest_author":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3/action/author_attestation","sign_citation":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3/action/citation_signature","submit_replication":"https://pith.science/pith/FJJUDIEMBAL4QJGEMAAAA4DFL3/action/replication_record"}},"created_at":"2026-05-18T00:42:30.142419+00:00","updated_at":"2026-05-18T00:42:30.142419+00:00"}