{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:V44U676KN7TWWNNH25WAHVBZHY","short_pith_number":"pith:V44U676K","schema_version":"1.0","canonical_sha256":"af394f7fca6fe76b35a7d76c03d4393e1ad59afefc7d262629fca81e63196513","source":{"kind":"arxiv","id":"1711.09048","version":3},"attestation_state":"computed","paper":{"title":"A Compression-Inspired Framework for Macro Discovery","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO","cs.SY"],"primary_cat":"cs.AI","authors_text":"Bruno C. da Silva, Francisco M. Garcia, Philip S. Thomas","submitted_at":"2017-11-24T16:58:45Z","abstract_excerpt":"In this paper we consider the problem of how a reinforcement learning agent tasked with solving a set of related Markov decision processes can use knowledge acquired early in its lifetime to improve its ability to more rapidly solve novel, but related, tasks. One way of exploiting this experience is by identifying recurrent patterns in trajectories obtained from well-performing policies. We propose a three-step framework in which an agent 1) generates a set of candidate open-loop macros by compressing trajectories drawn from near-optimal policies; 2) evaluates the value of each macro; and 3) s"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.09048","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-11-24T16:58:45Z","cross_cats_sorted":["cs.RO","cs.SY"],"title_canon_sha256":"ec2a35efa7e7944c679874d9fe00d93e41600efa043b078976ee2c8a5c1bf777","abstract_canon_sha256":"9cc1353a9bff203f3635cd793f143121a3cf587f06a9a62a6298205ab8b7428b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:55.920292Z","signature_b64":"wmVCJaHVvWRlWH3Irm1NdNti4eNLLGEpXOE73Cjf1AZl02czmPplBL/SHmPAc11p4MsE0q6vB8g0F3UE1g5yAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"af394f7fca6fe76b35a7d76c03d4393e1ad59afefc7d262629fca81e63196513","last_reissued_at":"2026-05-17T23:52:55.919588Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:55.919588Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Compression-Inspired Framework for Macro Discovery","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO","cs.SY"],"primary_cat":"cs.AI","authors_text":"Bruno C. da Silva, Francisco M. Garcia, Philip S. Thomas","submitted_at":"2017-11-24T16:58:45Z","abstract_excerpt":"In this paper we consider the problem of how a reinforcement learning agent tasked with solving a set of related Markov decision processes can use knowledge acquired early in its lifetime to improve its ability to more rapidly solve novel, but related, tasks. One way of exploiting this experience is by identifying recurrent patterns in trajectories obtained from well-performing policies. We propose a three-step framework in which an agent 1) generates a set of candidate open-loop macros by compressing trajectories drawn from near-optimal policies; 2) evaluates the value of each macro; and 3) s"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.09048","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.09048","created_at":"2026-05-17T23:52:55.919690+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.09048v3","created_at":"2026-05-17T23:52:55.919690+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.09048","created_at":"2026-05-17T23:52:55.919690+00:00"},{"alias_kind":"pith_short_12","alias_value":"V44U676KN7TW","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_16","alias_value":"V44U676KN7TWWNNH","created_at":"2026-05-18T12:31:49.984773+00:00"},{"alias_kind":"pith_short_8","alias_value":"V44U676K","created_at":"2026-05-18T12:31:49.984773+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY","json":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY.json","graph_json":"https://pith.science/api/pith-number/V44U676KN7TWWNNH25WAHVBZHY/graph.json","events_json":"https://pith.science/api/pith-number/V44U676KN7TWWNNH25WAHVBZHY/events.json","paper":"https://pith.science/paper/V44U676K"},"agent_actions":{"view_html":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY","download_json":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY.json","view_paper":"https://pith.science/paper/V44U676K","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.09048&json=true","fetch_graph":"https://pith.science/api/pith-number/V44U676KN7TWWNNH25WAHVBZHY/graph.json","fetch_events":"https://pith.science/api/pith-number/V44U676KN7TWWNNH25WAHVBZHY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY/action/storage_attestation","attest_author":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY/action/author_attestation","sign_citation":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY/action/citation_signature","submit_replication":"https://pith.science/pith/V44U676KN7TWWNNH25WAHVBZHY/action/replication_record"}},"created_at":"2026-05-17T23:52:55.919690+00:00","updated_at":"2026-05-17T23:52:55.919690+00:00"}