{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:SUG6MCJZCMOCKARURESN6THZXY","short_pith_number":"pith:SUG6MCJZ","schema_version":"1.0","canonical_sha256":"950de60939131c2502348924df4cf9be2563a3e39d6aa54753cbd6d5682de522","source":{"kind":"arxiv","id":"1902.10754","version":1},"attestation_state":"computed","paper":{"title":"Introspection Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chris R. Serrano, Michael A. Warren","submitted_at":"2019-02-27T19:53:01Z","abstract_excerpt":"Traditional reinforcement learning agents learn from experience, past or present, gained through interaction with their environment. Our approach synthesizes experience, without requiring an agent to interact with their environment, by asking the policy directly \"Are there situations X, Y, and Z, such that in these situations you would select actions A, B, and C?\" In this paper we present Introspection Learning, an algorithm that allows for the asking of these types of questions of neural network policies. Introspection Learning is reinforcement learning algorithm agnostic and the states retur"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.10754","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-27T19:53:01Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"c9a52779d7a4db35ca049fae0573be43e11070a6c4384fe88bf9d6e298ccf65f","abstract_canon_sha256":"2f1188812472570b21cef280f68e23bf7aba66bfdcab9c6f35b1bcd39393a585"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:29.794953Z","signature_b64":"UKc7sUpVAxUMdQu00r9XJP3VfhqdOBQKI8s/73gbNaO/7awxi3b9JX+IHJAgCf5Aau+zb5FFlJHIt2cjx9VXDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"950de60939131c2502348924df4cf9be2563a3e39d6aa54753cbd6d5682de522","last_reissued_at":"2026-05-17T23:52:29.794408Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:29.794408Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Introspection Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Chris R. Serrano, Michael A. Warren","submitted_at":"2019-02-27T19:53:01Z","abstract_excerpt":"Traditional reinforcement learning agents learn from experience, past or present, gained through interaction with their environment. Our approach synthesizes experience, without requiring an agent to interact with their environment, by asking the policy directly \"Are there situations X, Y, and Z, such that in these situations you would select actions A, B, and C?\" In this paper we present Introspection Learning, an algorithm that allows for the asking of these types of questions of neural network policies. Introspection Learning is reinforcement learning algorithm agnostic and the states retur"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.10754","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.10754","created_at":"2026-05-17T23:52:29.794500+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.10754v1","created_at":"2026-05-17T23:52:29.794500+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.10754","created_at":"2026-05-17T23:52:29.794500+00:00"},{"alias_kind":"pith_short_12","alias_value":"SUG6MCJZCMOC","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"SUG6MCJZCMOCKARU","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"SUG6MCJZ","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY","json":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY.json","graph_json":"https://pith.science/api/pith-number/SUG6MCJZCMOCKARURESN6THZXY/graph.json","events_json":"https://pith.science/api/pith-number/SUG6MCJZCMOCKARURESN6THZXY/events.json","paper":"https://pith.science/paper/SUG6MCJZ"},"agent_actions":{"view_html":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY","download_json":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY.json","view_paper":"https://pith.science/paper/SUG6MCJZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.10754&json=true","fetch_graph":"https://pith.science/api/pith-number/SUG6MCJZCMOCKARURESN6THZXY/graph.json","fetch_events":"https://pith.science/api/pith-number/SUG6MCJZCMOCKARURESN6THZXY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY/action/storage_attestation","attest_author":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY/action/author_attestation","sign_citation":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY/action/citation_signature","submit_replication":"https://pith.science/pith/SUG6MCJZCMOCKARURESN6THZXY/action/replication_record"}},"created_at":"2026-05-17T23:52:29.794500+00:00","updated_at":"2026-05-17T23:52:29.794500+00:00"}