{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:7P7YFZ3FPLSZAT3BTSFHT5WQLH","short_pith_number":"pith:7P7YFZ3F","schema_version":"1.0","canonical_sha256":"fbff82e7657ae5904f619c8a79f6d059d6e32a6108251f1f5c7bf3d0b576ec79","source":{"kind":"arxiv","id":"1711.06782","version":1},"attestation_state":"computed","paper":{"title":"Leave no Trace: Learning to Reset for Safe and Autonomous Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.LG","authors_text":"Benjamin Eysenbach, Julian Ibarz, Sergey Levine, Shixiang Gu","submitted_at":"2017-11-18T00:53:20Z","abstract_excerpt":"Deep reinforcement learning algorithms can learn complex behavioral skills, but real-world application of these methods requires a large amount of experience to be collected by the agent. In practical settings, such as robotics, this involves repeatedly attempting a task, resetting the environment between each attempt. However, not all tasks are easily or automatically reversible. In practice, this learning process requires extensive human intervention. In this work, we propose an autonomous method for safe and efficient reinforcement learning that simultaneously learns a forward and reset pol"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.06782","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-11-18T00:53:20Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"919a6a40ed1e1cc755163ee54034f691a138b7e73369e41682a318826b3f6b68","abstract_canon_sha256":"b30da16143d9d6eca20b06ea1f7e1e252f8f018e4eff8972b61d3d6a2cff309b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:30:15.622636Z","signature_b64":"LXf95uTrJbMev+ZIAzNEo3BRlRHqE6jHOjS01KwYZVHwNxCeUCIAPSCxlXlC/7kBFMlBy0n5gIiSn+O98vizBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fbff82e7657ae5904f619c8a79f6d059d6e32a6108251f1f5c7bf3d0b576ec79","last_reissued_at":"2026-05-18T00:30:15.621999Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:30:15.621999Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Leave no Trace: Learning to Reset for Safe and Autonomous Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.LG","authors_text":"Benjamin Eysenbach, Julian Ibarz, Sergey Levine, Shixiang Gu","submitted_at":"2017-11-18T00:53:20Z","abstract_excerpt":"Deep reinforcement learning algorithms can learn complex behavioral skills, but real-world application of these methods requires a large amount of experience to be collected by the agent. In practical settings, such as robotics, this involves repeatedly attempting a task, resetting the environment between each attempt. However, not all tasks are easily or automatically reversible. In practice, this learning process requires extensive human intervention. In this work, we propose an autonomous method for safe and efficient reinforcement learning that simultaneously learns a forward and reset pol"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.06782","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.06782","created_at":"2026-05-18T00:30:15.622110+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.06782v1","created_at":"2026-05-18T00:30:15.622110+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.06782","created_at":"2026-05-18T00:30:15.622110+00:00"},{"alias_kind":"pith_short_12","alias_value":"7P7YFZ3FPLSZ","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_16","alias_value":"7P7YFZ3FPLSZAT3B","created_at":"2026-05-18T12:31:05.417338+00:00"},{"alias_kind":"pith_short_8","alias_value":"7P7YFZ3F","created_at":"2026-05-18T12:31:05.417338+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.01285","citing_title":"Learning the Arrow of Time","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2005.01643","citing_title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems","ref_index":296,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH","json":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH.json","graph_json":"https://pith.science/api/pith-number/7P7YFZ3FPLSZAT3BTSFHT5WQLH/graph.json","events_json":"https://pith.science/api/pith-number/7P7YFZ3FPLSZAT3BTSFHT5WQLH/events.json","paper":"https://pith.science/paper/7P7YFZ3F"},"agent_actions":{"view_html":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH","download_json":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH.json","view_paper":"https://pith.science/paper/7P7YFZ3F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.06782&json=true","fetch_graph":"https://pith.science/api/pith-number/7P7YFZ3FPLSZAT3BTSFHT5WQLH/graph.json","fetch_events":"https://pith.science/api/pith-number/7P7YFZ3FPLSZAT3BTSFHT5WQLH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH/action/storage_attestation","attest_author":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH/action/author_attestation","sign_citation":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH/action/citation_signature","submit_replication":"https://pith.science/pith/7P7YFZ3FPLSZAT3BTSFHT5WQLH/action/replication_record"}},"created_at":"2026-05-18T00:30:15.622110+00:00","updated_at":"2026-05-18T00:30:15.622110+00:00"}