{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:S7BIOT2BCQYQZPIACUIR6G32WP","short_pith_number":"pith:S7BIOT2B","canonical_record":{"source":{"id":"1203.0203","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-02-29T17:23:15Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"bd10ee3ca34e0fa13be5c04ee872fd9327d589b784d1b6d64619645dceda24e1","abstract_canon_sha256":"918c477a1645a059bea3a38fa46f830b7d0dfe970ef5595ac593ccd692c2857d"},"schema_version":"1.0"},"canonical_sha256":"97c2874f4114310cbd0015111f1b7ab3e743d64b83ede12f004281d59d92146a","source":{"kind":"arxiv","id":"1203.0203","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1203.0203","created_at":"2026-05-18T04:01:02Z"},{"alias_kind":"arxiv_version","alias_value":"1203.0203v1","created_at":"2026-05-18T04:01:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1203.0203","created_at":"2026-05-18T04:01:02Z"},{"alias_kind":"pith_short_12","alias_value":"S7BIOT2BCQYQ","created_at":"2026-05-18T12:27:20Z"},{"alias_kind":"pith_short_16","alias_value":"S7BIOT2BCQYQZPIA","created_at":"2026-05-18T12:27:20Z"},{"alias_kind":"pith_short_8","alias_value":"S7BIOT2B","created_at":"2026-05-18T12:27:20Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:S7BIOT2BCQYQZPIACUIR6G32WP","target":"record","payload":{"canonical_record":{"source":{"id":"1203.0203","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-02-29T17:23:15Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"bd10ee3ca34e0fa13be5c04ee872fd9327d589b784d1b6d64619645dceda24e1","abstract_canon_sha256":"918c477a1645a059bea3a38fa46f830b7d0dfe970ef5595ac593ccd692c2857d"},"schema_version":"1.0"},"canonical_sha256":"97c2874f4114310cbd0015111f1b7ab3e743d64b83ede12f004281d59d92146a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T04:01:02.889113Z","signature_b64":"GZOc3RRwcRVpRUfTMxikcadQTQ84AQ6KvQTxThmGN4wMSrCy1b44C/lRvgLpOnggX2HGMv25w08j/v2fYOX/BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"97c2874f4114310cbd0015111f1b7ab3e743d64b83ede12f004281d59d92146a","last_reissued_at":"2026-05-18T04:01:02.888454Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T04:01:02.888454Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1203.0203","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:01:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9snGp/fVRBO8emVeGvgkawKVxwTxWtW8lIcIJIQO4OSOSaUZnKExYmPZXH9vkaSMGAU6J+zxmILwEwbB/Y+gBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T18:25:46.289311Z"},"content_sha256":"d2958d3ff2e45689df32efb07e23e238c9cba9adcb4236d5a1b1fa682019f38c","schema_version":"1.0","event_id":"sha256:d2958d3ff2e45689df32efb07e23e238c9cba9adcb4236d5a1b1fa682019f38c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:S7BIOT2BCQYQZPIACUIR6G32WP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Fast Reinforcement Learning with Large Action Sets using Error-Correcting Output Codes for MDP Factorization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Gabriel Dulac-Arnold, Ludovic Denoyer, Patrick Gallinari, Philippe Preux","submitted_at":"2012-02-29T17:23:15Z","abstract_excerpt":"The use of Reinforcement Learning in real-world scenarios is strongly limited by issues of scale. Most RL learning algorithms are unable to deal with problems composed of hundreds or sometimes even dozens of possible actions, and therefore cannot be applied to many real-world problems. We consider the RL problem in the supervised classification framework where the optimal policy is obtained through a multiclass classifier, the set of classes being the set of actions of the problem. We introduce error-correcting output codes (ECOCs) in this setting and propose two new methods for reducing compl"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1203.0203","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:01:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nKWa/jbdgVPfeA8DtYbsT2oLIXLHibDBvRt3abtIzNK9QPtPYqtOwxBmbtufT6EoL45Xvs+0j0XlOyydVRhhDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T18:25:46.289990Z"},"content_sha256":"e21adfbd5ae199558ceafa790955e786359e0af035846b436a8fd75dc196a792","schema_version":"1.0","event_id":"sha256:e21adfbd5ae199558ceafa790955e786359e0af035846b436a8fd75dc196a792"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/S7BIOT2BCQYQZPIACUIR6G32WP/bundle.json","state_url":"https://pith.science/pith/S7BIOT2BCQYQZPIACUIR6G32WP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/S7BIOT2BCQYQZPIACUIR6G32WP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T18:25:46Z","links":{"resolver":"https://pith.science/pith/S7BIOT2BCQYQZPIACUIR6G32WP","bundle":"https://pith.science/pith/S7BIOT2BCQYQZPIACUIR6G32WP/bundle.json","state":"https://pith.science/pith/S7BIOT2BCQYQZPIACUIR6G32WP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/S7BIOT2BCQYQZPIACUIR6G32WP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:S7BIOT2BCQYQZPIACUIR6G32WP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"918c477a1645a059bea3a38fa46f830b7d0dfe970ef5595ac593ccd692c2857d","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-02-29T17:23:15Z","title_canon_sha256":"bd10ee3ca34e0fa13be5c04ee872fd9327d589b784d1b6d64619645dceda24e1"},"schema_version":"1.0","source":{"id":"1203.0203","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1203.0203","created_at":"2026-05-18T04:01:02Z"},{"alias_kind":"arxiv_version","alias_value":"1203.0203v1","created_at":"2026-05-18T04:01:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1203.0203","created_at":"2026-05-18T04:01:02Z"},{"alias_kind":"pith_short_12","alias_value":"S7BIOT2BCQYQ","created_at":"2026-05-18T12:27:20Z"},{"alias_kind":"pith_short_16","alias_value":"S7BIOT2BCQYQZPIA","created_at":"2026-05-18T12:27:20Z"},{"alias_kind":"pith_short_8","alias_value":"S7BIOT2B","created_at":"2026-05-18T12:27:20Z"}],"graph_snapshots":[{"event_id":"sha256:e21adfbd5ae199558ceafa790955e786359e0af035846b436a8fd75dc196a792","target":"graph","created_at":"2026-05-18T04:01:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The use of Reinforcement Learning in real-world scenarios is strongly limited by issues of scale. Most RL learning algorithms are unable to deal with problems composed of hundreds or sometimes even dozens of possible actions, and therefore cannot be applied to many real-world problems. We consider the RL problem in the supervised classification framework where the optimal policy is obtained through a multiclass classifier, the set of classes being the set of actions of the problem. We introduce error-correcting output codes (ECOCs) in this setting and propose two new methods for reducing compl","authors_text":"Gabriel Dulac-Arnold, Ludovic Denoyer, Patrick Gallinari, Philippe Preux","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-02-29T17:23:15Z","title":"Fast Reinforcement Learning with Large Action Sets using Error-Correcting Output Codes for MDP Factorization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1203.0203","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d2958d3ff2e45689df32efb07e23e238c9cba9adcb4236d5a1b1fa682019f38c","target":"record","created_at":"2026-05-18T04:01:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"918c477a1645a059bea3a38fa46f830b7d0dfe970ef5595ac593ccd692c2857d","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2012-02-29T17:23:15Z","title_canon_sha256":"bd10ee3ca34e0fa13be5c04ee872fd9327d589b784d1b6d64619645dceda24e1"},"schema_version":"1.0","source":{"id":"1203.0203","kind":"arxiv","version":1}},"canonical_sha256":"97c2874f4114310cbd0015111f1b7ab3e743d64b83ede12f004281d59d92146a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"97c2874f4114310cbd0015111f1b7ab3e743d64b83ede12f004281d59d92146a","first_computed_at":"2026-05-18T04:01:02.888454Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T04:01:02.888454Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"GZOc3RRwcRVpRUfTMxikcadQTQ84AQ6KvQTxThmGN4wMSrCy1b44C/lRvgLpOnggX2HGMv25w08j/v2fYOX/BQ==","signature_status":"signed_v1","signed_at":"2026-05-18T04:01:02.889113Z","signed_message":"canonical_sha256_bytes"},"source_id":"1203.0203","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d2958d3ff2e45689df32efb07e23e238c9cba9adcb4236d5a1b1fa682019f38c","sha256:e21adfbd5ae199558ceafa790955e786359e0af035846b436a8fd75dc196a792"],"state_sha256":"92331b77fd690f15a62e21b367527dbd412a2b495e29c6afb4e940868909f5e9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nv0f8HIhutOECp01wPDLnTqso0W6hiZc2efrf4E8ssV7MGGOWqqeW71Bq1/v12Mwjo3lnm9UzJGJfNKPoRLCBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T18:25:46.294061Z","bundle_sha256":"0876db995bc06807259f5006f103e5f275b5286e63a1552e9b483b52add76b9f"}}