{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:H542XFWWA45W5IO2OIANNAC724","short_pith_number":"pith:H542XFWW","canonical_record":{"source":{"id":"1806.06187","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T05:17:32Z","cross_cats_sorted":[],"title_canon_sha256":"fa93797d5907bea90499d6ad21a43b716c6d817f24eca36d7053a71318889c7e","abstract_canon_sha256":"f75886ea13cfb232e159214fb3e60f03570f1965a484b81476173fcb5d6a99f2"},"schema_version":"1.0"},"canonical_sha256":"3f79ab96d6073b6ea1da7200d6805fd73919151f00179f95d2596e763d34c4ab","source":{"kind":"arxiv","id":"1806.06187","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.06187","created_at":"2026-05-18T00:11:17Z"},{"alias_kind":"arxiv_version","alias_value":"1806.06187v2","created_at":"2026-05-18T00:11:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.06187","created_at":"2026-05-18T00:11:17Z"},{"alias_kind":"pith_short_12","alias_value":"H542XFWWA45W","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"H542XFWWA45W5IO2","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"H542XFWW","created_at":"2026-05-18T12:32:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:H542XFWWA45W5IO2OIANNAC724","target":"record","payload":{"canonical_record":{"source":{"id":"1806.06187","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T05:17:32Z","cross_cats_sorted":[],"title_canon_sha256":"fa93797d5907bea90499d6ad21a43b716c6d817f24eca36d7053a71318889c7e","abstract_canon_sha256":"f75886ea13cfb232e159214fb3e60f03570f1965a484b81476173fcb5d6a99f2"},"schema_version":"1.0"},"canonical_sha256":"3f79ab96d6073b6ea1da7200d6805fd73919151f00179f95d2596e763d34c4ab","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:11:17.120088Z","signature_b64":"6oTVVO5o9Se+l4Xj3dmG7ifmT85oIH2j/PSKBbK6l/tv03l7qo3grJyX5I7QXnUuGvOjHNL3cQ5D0acS9NLtCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3f79ab96d6073b6ea1da7200d6805fd73919151f00179f95d2596e763d34c4ab","last_reissued_at":"2026-05-18T00:11:17.119492Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:11:17.119492Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.06187","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cBij6VoeP75wkTDjJR8sMjuJsDrB/DQBjHnqUX8aAy8k90M+LnWs/q0BFfeiLMmagemzeIhwQ+DZn6h68dYnDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T13:25:37.962907Z"},"content_sha256":"64bd0d9ba51ead48404f4a1933530f69c93cfe47e14377f6047b2e5f8ad7a36f","schema_version":"1.0","event_id":"sha256:64bd0d9ba51ead48404f4a1933530f69c93cfe47e14377f6047b2e5f8ad7a36f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:H542XFWWA45W5IO2OIANNAC724","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Scheduled Policy Optimization for Natural Language Communication with Intelligent Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bowen Zhou, Mo Yu, Shiyu Chang, Wenhan Xiong, William Yang Wang, Xiaoxiao Guo","submitted_at":"2018-06-16T05:17:32Z","abstract_excerpt":"We investigate the task of learning to follow natural language instructions by jointly reasoning with visual observations and language inputs. In contrast to existing methods which start with learning from demonstrations (LfD) and then use reinforcement learning (RL) to fine-tune the model parameters, we propose a novel policy optimization algorithm which dynamically schedules demonstration learning and RL. The proposed training paradigm provides efficient exploration and better generalization beyond existing methods. Comparing to existing ensemble models, the best single model based on our pr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.06187","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:11:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o1ACy5ZfmItXTXv4mXo9xiIgFj1DZIu2XKbc0C1hn40SF821cCM3JjIlsB22+NF3TZsKhHpgajj2dmYqPBS1CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T13:25:37.963575Z"},"content_sha256":"74ddeb0daaa7ec65933f453a328f47dc963a162e9f3023307567b12447cc021e","schema_version":"1.0","event_id":"sha256:74ddeb0daaa7ec65933f453a328f47dc963a162e9f3023307567b12447cc021e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/H542XFWWA45W5IO2OIANNAC724/bundle.json","state_url":"https://pith.science/pith/H542XFWWA45W5IO2OIANNAC724/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/H542XFWWA45W5IO2OIANNAC724/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T13:25:37Z","links":{"resolver":"https://pith.science/pith/H542XFWWA45W5IO2OIANNAC724","bundle":"https://pith.science/pith/H542XFWWA45W5IO2OIANNAC724/bundle.json","state":"https://pith.science/pith/H542XFWWA45W5IO2OIANNAC724/state.json","well_known_bundle":"https://pith.science/.well-known/pith/H542XFWWA45W5IO2OIANNAC724/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:H542XFWWA45W5IO2OIANNAC724","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f75886ea13cfb232e159214fb3e60f03570f1965a484b81476173fcb5d6a99f2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T05:17:32Z","title_canon_sha256":"fa93797d5907bea90499d6ad21a43b716c6d817f24eca36d7053a71318889c7e"},"schema_version":"1.0","source":{"id":"1806.06187","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.06187","created_at":"2026-05-18T00:11:17Z"},{"alias_kind":"arxiv_version","alias_value":"1806.06187v2","created_at":"2026-05-18T00:11:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.06187","created_at":"2026-05-18T00:11:17Z"},{"alias_kind":"pith_short_12","alias_value":"H542XFWWA45W","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"H542XFWWA45W5IO2","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"H542XFWW","created_at":"2026-05-18T12:32:28Z"}],"graph_snapshots":[{"event_id":"sha256:74ddeb0daaa7ec65933f453a328f47dc963a162e9f3023307567b12447cc021e","target":"graph","created_at":"2026-05-18T00:11:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We investigate the task of learning to follow natural language instructions by jointly reasoning with visual observations and language inputs. In contrast to existing methods which start with learning from demonstrations (LfD) and then use reinforcement learning (RL) to fine-tune the model parameters, we propose a novel policy optimization algorithm which dynamically schedules demonstration learning and RL. The proposed training paradigm provides efficient exploration and better generalization beyond existing methods. Comparing to existing ensemble models, the best single model based on our pr","authors_text":"Bowen Zhou, Mo Yu, Shiyu Chang, Wenhan Xiong, William Yang Wang, Xiaoxiao Guo","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T05:17:32Z","title":"Scheduled Policy Optimization for Natural Language Communication with Intelligent Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.06187","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:64bd0d9ba51ead48404f4a1933530f69c93cfe47e14377f6047b2e5f8ad7a36f","target":"record","created_at":"2026-05-18T00:11:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f75886ea13cfb232e159214fb3e60f03570f1965a484b81476173fcb5d6a99f2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-06-16T05:17:32Z","title_canon_sha256":"fa93797d5907bea90499d6ad21a43b716c6d817f24eca36d7053a71318889c7e"},"schema_version":"1.0","source":{"id":"1806.06187","kind":"arxiv","version":2}},"canonical_sha256":"3f79ab96d6073b6ea1da7200d6805fd73919151f00179f95d2596e763d34c4ab","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3f79ab96d6073b6ea1da7200d6805fd73919151f00179f95d2596e763d34c4ab","first_computed_at":"2026-05-18T00:11:17.119492Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:11:17.119492Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6oTVVO5o9Se+l4Xj3dmG7ifmT85oIH2j/PSKBbK6l/tv03l7qo3grJyX5I7QXnUuGvOjHNL3cQ5D0acS9NLtCw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:11:17.120088Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.06187","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:64bd0d9ba51ead48404f4a1933530f69c93cfe47e14377f6047b2e5f8ad7a36f","sha256:74ddeb0daaa7ec65933f453a328f47dc963a162e9f3023307567b12447cc021e"],"state_sha256":"5f3b92f86f3f27d90ae56411043ba187cebd512e23e2c05299457e3e59e7eb34"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3cKrP+L9N8E3Dm8xabi+BdCno+lBGjuv7L38206K8xXtirwSHE/em0q7iJ+SVaU6LUPwlsYJSKJDrPq8JnmWCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T13:25:37.966650Z","bundle_sha256":"47514d9d114f7009f9aa9f24208ac89e09aa31e97bb65b8554048bc16dc5b02a"}}