{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:UDI6FWYKRMA5QW6G5EBQPM46NC","short_pith_number":"pith:UDI6FWYK","canonical_record":{"source":{"id":"1709.09611","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-27T16:37:51Z","cross_cats_sorted":[],"title_canon_sha256":"33126e51262d094aa7260acea538f8491ca9b992033e680b624269dd2313e38a","abstract_canon_sha256":"498af0cc6e9ac6e26bb3b9ab51b65f286fac5e87b3377a552ba6c247bf7e4ca5"},"schema_version":"1.0"},"canonical_sha256":"a0d1e2db0a8b01d85bc6e90307b39e68ad7d46df6605dff17ed264a7c4b6cede","source":{"kind":"arxiv","id":"1709.09611","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.09611","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"arxiv_version","alias_value":"1709.09611v1","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.09611","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"pith_short_12","alias_value":"UDI6FWYKRMA5","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_16","alias_value":"UDI6FWYKRMA5QW6G","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_8","alias_value":"UDI6FWYK","created_at":"2026-05-18T12:31:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:UDI6FWYKRMA5QW6G5EBQPM46NC","target":"record","payload":{"canonical_record":{"source":{"id":"1709.09611","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-27T16:37:51Z","cross_cats_sorted":[],"title_canon_sha256":"33126e51262d094aa7260acea538f8491ca9b992033e680b624269dd2313e38a","abstract_canon_sha256":"498af0cc6e9ac6e26bb3b9ab51b65f286fac5e87b3377a552ba6c247bf7e4ca5"},"schema_version":"1.0"},"canonical_sha256":"a0d1e2db0a8b01d85bc6e90307b39e68ad7d46df6605dff17ed264a7c4b6cede","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:34:11.175258Z","signature_b64":"f2BM3nUL6zq/yuKGQjmzitKRbF1UKoVW0b2+64rXUPTcx9owhAsyuZOUfW6Ny7F9MmSeF7RXpxINL1CFUCVcAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a0d1e2db0a8b01d85bc6e90307b39e68ad7d46df6605dff17ed264a7c4b6cede","last_reissued_at":"2026-05-18T00:34:11.174609Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:34:11.174609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.09611","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:34:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IRp2uVscMneK1fyMGyOaQsKWxgO/OImzoREpfe3Ec0YH8du7kVTntH8hruwD3Zm9s24/kYORYY3sYjD4BzeWBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T22:38:26.498934Z"},"content_sha256":"69241bb3059ebb203117a12685ac3dbb3faca6fb41de4282e400442490ef077b","schema_version":"1.0","event_id":"sha256:69241bb3059ebb203117a12685ac3dbb3faca6fb41de4282e400442490ef077b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:UDI6FWYKRMA5QW6G5EBQPM46NC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Policy Search Method For Temporal Logic Specified Reinforcement Learning Tasks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Calin Belta, Xiao Li, Yao Ma","submitted_at":"2017-09-27T16:37:51Z","abstract_excerpt":"Reward engineering is an important aspect of reinforcement learning. Whether or not the user's intentions can be correctly encapsulated in the reward function can significantly impact the learning outcome. Current methods rely on manually crafted reward functions that often require parameter tuning to obtain the desired behavior. This operation can be expensive when exploration requires systems to interact with the physical world. In this paper, we explore the use of temporal logic (TL) to specify tasks in reinforcement learning. TL formula can be translated to a real-valued function that meas"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.09611","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:34:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KdsB+ggTwwyWsZNEqqjwyo2XNHy88Dm8id0ZxQbLBI2q37tDPNNbhgfI4QORmn1Pcf0tWVJbjFRZG/JmzciZBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T22:38:26.499487Z"},"content_sha256":"425cd2ee9c5c2185f38c514710d6f3b4b4cbad64a2a63df2ad24c6e1c3cd911d","schema_version":"1.0","event_id":"sha256:425cd2ee9c5c2185f38c514710d6f3b4b4cbad64a2a63df2ad24c6e1c3cd911d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UDI6FWYKRMA5QW6G5EBQPM46NC/bundle.json","state_url":"https://pith.science/pith/UDI6FWYKRMA5QW6G5EBQPM46NC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UDI6FWYKRMA5QW6G5EBQPM46NC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T22:38:26Z","links":{"resolver":"https://pith.science/pith/UDI6FWYKRMA5QW6G5EBQPM46NC","bundle":"https://pith.science/pith/UDI6FWYKRMA5QW6G5EBQPM46NC/bundle.json","state":"https://pith.science/pith/UDI6FWYKRMA5QW6G5EBQPM46NC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UDI6FWYKRMA5QW6G5EBQPM46NC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:UDI6FWYKRMA5QW6G5EBQPM46NC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"498af0cc6e9ac6e26bb3b9ab51b65f286fac5e87b3377a552ba6c247bf7e4ca5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-27T16:37:51Z","title_canon_sha256":"33126e51262d094aa7260acea538f8491ca9b992033e680b624269dd2313e38a"},"schema_version":"1.0","source":{"id":"1709.09611","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.09611","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"arxiv_version","alias_value":"1709.09611v1","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.09611","created_at":"2026-05-18T00:34:11Z"},{"alias_kind":"pith_short_12","alias_value":"UDI6FWYKRMA5","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_16","alias_value":"UDI6FWYKRMA5QW6G","created_at":"2026-05-18T12:31:46Z"},{"alias_kind":"pith_short_8","alias_value":"UDI6FWYK","created_at":"2026-05-18T12:31:46Z"}],"graph_snapshots":[{"event_id":"sha256:425cd2ee9c5c2185f38c514710d6f3b4b4cbad64a2a63df2ad24c6e1c3cd911d","target":"graph","created_at":"2026-05-18T00:34:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reward engineering is an important aspect of reinforcement learning. Whether or not the user's intentions can be correctly encapsulated in the reward function can significantly impact the learning outcome. Current methods rely on manually crafted reward functions that often require parameter tuning to obtain the desired behavior. This operation can be expensive when exploration requires systems to interact with the physical world. In this paper, we explore the use of temporal logic (TL) to specify tasks in reinforcement learning. TL formula can be translated to a real-valued function that meas","authors_text":"Calin Belta, Xiao Li, Yao Ma","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-27T16:37:51Z","title":"A Policy Search Method For Temporal Logic Specified Reinforcement Learning Tasks"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.09611","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:69241bb3059ebb203117a12685ac3dbb3faca6fb41de4282e400442490ef077b","target":"record","created_at":"2026-05-18T00:34:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"498af0cc6e9ac6e26bb3b9ab51b65f286fac5e87b3377a552ba6c247bf7e4ca5","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-09-27T16:37:51Z","title_canon_sha256":"33126e51262d094aa7260acea538f8491ca9b992033e680b624269dd2313e38a"},"schema_version":"1.0","source":{"id":"1709.09611","kind":"arxiv","version":1}},"canonical_sha256":"a0d1e2db0a8b01d85bc6e90307b39e68ad7d46df6605dff17ed264a7c4b6cede","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a0d1e2db0a8b01d85bc6e90307b39e68ad7d46df6605dff17ed264a7c4b6cede","first_computed_at":"2026-05-18T00:34:11.174609Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:34:11.174609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"f2BM3nUL6zq/yuKGQjmzitKRbF1UKoVW0b2+64rXUPTcx9owhAsyuZOUfW6Ny7F9MmSeF7RXpxINL1CFUCVcAQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:34:11.175258Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.09611","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:69241bb3059ebb203117a12685ac3dbb3faca6fb41de4282e400442490ef077b","sha256:425cd2ee9c5c2185f38c514710d6f3b4b4cbad64a2a63df2ad24c6e1c3cd911d"],"state_sha256":"bf1643b30a260b86033f9cd8c06b566b9fb8e98f7eae0a4228a1a1511cdea03b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dh3Sm+FpGBCpcA7GMaAsJgAQVUFmL9l5MYQOs8aLlVnuWjROspWiFCaN6hv+msW+hrmco8v/sU0/99pAUV6eDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T22:38:26.502025Z","bundle_sha256":"8860690659ba361a33a7a343bf4265e9ea1b5347ae0be7d129f48afc8bcbd655"}}