{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2010:2TYLANOACBJHID534MEPNZT4DC","short_pith_number":"pith:2TYLANOA","canonical_record":{"source":{"id":"1011.1660","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-11-07T17:45:57Z","cross_cats_sorted":[],"title_canon_sha256":"5ca114c2bda46fd7ab0fb68c92aaaf51903736fa7ada93beaa7f37540e4a46e8","abstract_canon_sha256":"55387b80502ff590249ce17e7aa8f3015ae9bf49b73ef6a862f7a3017c7deb9e"},"schema_version":"1.0"},"canonical_sha256":"d4f0b035c01052740fbbe308f6e67c18b597a2795c47aff30c11c4993f6edeab","source":{"kind":"arxiv","id":"1011.1660","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1011.1660","created_at":"2026-05-18T04:36:50Z"},{"alias_kind":"arxiv_version","alias_value":"1011.1660v1","created_at":"2026-05-18T04:36:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1011.1660","created_at":"2026-05-18T04:36:50Z"},{"alias_kind":"pith_short_12","alias_value":"2TYLANOACBJH","created_at":"2026-05-18T12:26:03Z"},{"alias_kind":"pith_short_16","alias_value":"2TYLANOACBJHID53","created_at":"2026-05-18T12:26:03Z"},{"alias_kind":"pith_short_8","alias_value":"2TYLANOA","created_at":"2026-05-18T12:26:03Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2010:2TYLANOACBJHID534MEPNZT4DC","target":"record","payload":{"canonical_record":{"source":{"id":"1011.1660","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-11-07T17:45:57Z","cross_cats_sorted":[],"title_canon_sha256":"5ca114c2bda46fd7ab0fb68c92aaaf51903736fa7ada93beaa7f37540e4a46e8","abstract_canon_sha256":"55387b80502ff590249ce17e7aa8f3015ae9bf49b73ef6a862f7a3017c7deb9e"},"schema_version":"1.0"},"canonical_sha256":"d4f0b035c01052740fbbe308f6e67c18b597a2795c47aff30c11c4993f6edeab","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T04:36:50.773405Z","signature_b64":"6+GAn4nuBWnBhDv4Xz699QUAfKB9Pc9IdvaZZ643leB9+ZqGueQnCU3IujCati7cNu29ZtEMsdhqmRRy0x0RDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d4f0b035c01052740fbbe308f6e67c18b597a2795c47aff30c11c4993f6edeab","last_reissued_at":"2026-05-18T04:36:50.772983Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T04:36:50.772983Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1011.1660","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:36:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v2ePdLzYf9hxyPcXvVX8e/82OEx/uqqoyE0qeTou6rIBz3xIJ2gVnir3msebeSx5Ck6fzBKphkmrpIFTwsdFDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:58:23.026720Z"},"content_sha256":"0febea2a0f3ec591240e9dc924bc39cf43d46cdeb8d4a130573d5482877ce230","schema_version":"1.0","event_id":"sha256:0febea2a0f3ec591240e9dc924bc39cf43d46cdeb8d4a130573d5482877ce230"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2010:2TYLANOACBJHID534MEPNZT4DC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning Based on Active Learning Method","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Ali Akbar Kiaei, Hesam Sagha, Hosein Khasteh, Saeed Bagheri Shouraki","submitted_at":"2010-11-07T17:45:57Z","abstract_excerpt":"In this paper, a new reinforcement learning approach is proposed which is based on a powerful concept named Active Learning Method (ALM) in modeling. ALM expresses any multi-input-single-output system as a fuzzy combination of some single-input-singleoutput systems. The proposed method is an actor-critic system similar to Generalized Approximate Reasoning based Intelligent Control (GARIC) structure to adapt the ALM by delayed reinforcement signals. Our system uses Temporal Difference (TD) learning to model the behavior of useful actions of a control system. The goodness of an action is modeled"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1011.1660","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T04:36:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C1jhdtM1qpJJ9goD+KYJfoFTIt1FyJB9M14UkE48R2rp6IvqSUCLulvE63FWejLObQTN0OkM9Y+ef/txL9CIDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:58:23.027461Z"},"content_sha256":"cac6ce1bf15889ceda1f02af4e152c062943b6dc4ced1ed0f9dc9b385dbbe1d2","schema_version":"1.0","event_id":"sha256:cac6ce1bf15889ceda1f02af4e152c062943b6dc4ced1ed0f9dc9b385dbbe1d2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2TYLANOACBJHID534MEPNZT4DC/bundle.json","state_url":"https://pith.science/pith/2TYLANOACBJHID534MEPNZT4DC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2TYLANOACBJHID534MEPNZT4DC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:58:23Z","links":{"resolver":"https://pith.science/pith/2TYLANOACBJHID534MEPNZT4DC","bundle":"https://pith.science/pith/2TYLANOACBJHID534MEPNZT4DC/bundle.json","state":"https://pith.science/pith/2TYLANOACBJHID534MEPNZT4DC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2TYLANOACBJHID534MEPNZT4DC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2010:2TYLANOACBJHID534MEPNZT4DC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"55387b80502ff590249ce17e7aa8f3015ae9bf49b73ef6a862f7a3017c7deb9e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-11-07T17:45:57Z","title_canon_sha256":"5ca114c2bda46fd7ab0fb68c92aaaf51903736fa7ada93beaa7f37540e4a46e8"},"schema_version":"1.0","source":{"id":"1011.1660","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1011.1660","created_at":"2026-05-18T04:36:50Z"},{"alias_kind":"arxiv_version","alias_value":"1011.1660v1","created_at":"2026-05-18T04:36:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1011.1660","created_at":"2026-05-18T04:36:50Z"},{"alias_kind":"pith_short_12","alias_value":"2TYLANOACBJH","created_at":"2026-05-18T12:26:03Z"},{"alias_kind":"pith_short_16","alias_value":"2TYLANOACBJHID53","created_at":"2026-05-18T12:26:03Z"},{"alias_kind":"pith_short_8","alias_value":"2TYLANOA","created_at":"2026-05-18T12:26:03Z"}],"graph_snapshots":[{"event_id":"sha256:cac6ce1bf15889ceda1f02af4e152c062943b6dc4ced1ed0f9dc9b385dbbe1d2","target":"graph","created_at":"2026-05-18T04:36:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this paper, a new reinforcement learning approach is proposed which is based on a powerful concept named Active Learning Method (ALM) in modeling. ALM expresses any multi-input-single-output system as a fuzzy combination of some single-input-singleoutput systems. The proposed method is an actor-critic system similar to Generalized Approximate Reasoning based Intelligent Control (GARIC) structure to adapt the ALM by delayed reinforcement signals. Our system uses Temporal Difference (TD) learning to model the behavior of useful actions of a control system. The goodness of an action is modeled","authors_text":"Ali Akbar Kiaei, Hesam Sagha, Hosein Khasteh, Saeed Bagheri Shouraki","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-11-07T17:45:57Z","title":"Reinforcement Learning Based on Active Learning Method"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1011.1660","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0febea2a0f3ec591240e9dc924bc39cf43d46cdeb8d4a130573d5482877ce230","target":"record","created_at":"2026-05-18T04:36:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"55387b80502ff590249ce17e7aa8f3015ae9bf49b73ef6a862f7a3017c7deb9e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2010-11-07T17:45:57Z","title_canon_sha256":"5ca114c2bda46fd7ab0fb68c92aaaf51903736fa7ada93beaa7f37540e4a46e8"},"schema_version":"1.0","source":{"id":"1011.1660","kind":"arxiv","version":1}},"canonical_sha256":"d4f0b035c01052740fbbe308f6e67c18b597a2795c47aff30c11c4993f6edeab","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d4f0b035c01052740fbbe308f6e67c18b597a2795c47aff30c11c4993f6edeab","first_computed_at":"2026-05-18T04:36:50.772983Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T04:36:50.772983Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6+GAn4nuBWnBhDv4Xz699QUAfKB9Pc9IdvaZZ643leB9+ZqGueQnCU3IujCati7cNu29ZtEMsdhqmRRy0x0RDA==","signature_status":"signed_v1","signed_at":"2026-05-18T04:36:50.773405Z","signed_message":"canonical_sha256_bytes"},"source_id":"1011.1660","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0febea2a0f3ec591240e9dc924bc39cf43d46cdeb8d4a130573d5482877ce230","sha256:cac6ce1bf15889ceda1f02af4e152c062943b6dc4ced1ed0f9dc9b385dbbe1d2"],"state_sha256":"03826e912a0e4e7e8fd6ffb035ea5f87446907cebd186ace9be2322e611afaf5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9bPT/3chwFTFSq0o6nvT7tIsiWpOFPy32XSiI3v+v8pU4z35pGhwE/C15Cmk5JA6MWFg/vh9NKcyWLUtPJuhDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:58:23.031506Z","bundle_sha256":"c39b7db99ba8fe895a20cc039459ccae4083795ea11d2a2c0e51f19865e1d826"}}