{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:OP66Z23J3UXXMUIPJBQFUUJL7H","short_pith_number":"pith:OP66Z23J","schema_version":"1.0","canonical_sha256":"73fdeceb69dd2f76510f48605a512bf9c60b7e86b85675fa01ed8320f482b2df","source":{"kind":"arxiv","id":"1704.03899","version":1},"attestation_state":"computed","paper":{"title":"Deep Reinforcement Learning-based Image Captioning with Embedding Reward","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Li-Jia Li, Ning Zhang, Xiaoyu Wang, Xutao Lv, Zhou Ren","submitted_at":"2017-04-12T18:55:03Z","abstract_excerpt":"Image captioning is a challenging problem owing to the complexity in understanding the image content and diverse ways of describing it in natural language. Recent advances in deep neural networks have substantially improved the performance of this task. Most state-of-the-art approaches follow an encoder-decoder framework, which generates captions using a sequential recurrent prediction model. However, in this paper, we introduce a novel decision-making framework for image captioning. We utilize a \"policy network\" and a \"value network\" to collaboratively generate captions. The policy network se"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1704.03899","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-12T18:55:03Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"6e6f26213b826779470ef9fda975baec40c374bbabc314549c9e6c1b70f2336b","abstract_canon_sha256":"010a027185dd9cba512d546557996fd925fca70311dd22cf22c2380b36efad98"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:46:25.896343Z","signature_b64":"a1UM2N6HIrzHlNKlvHqgK8QheIeVyenU36cO85rNRU8cNd1lKdLqzCqTrcBH3H6FP6fuJFI3b/vv+x+8wEHECw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"73fdeceb69dd2f76510f48605a512bf9c60b7e86b85675fa01ed8320f482b2df","last_reissued_at":"2026-05-18T00:46:25.895769Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:46:25.895769Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Deep Reinforcement Learning-based Image Captioning with Embedding Reward","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Li-Jia Li, Ning Zhang, Xiaoyu Wang, Xutao Lv, Zhou Ren","submitted_at":"2017-04-12T18:55:03Z","abstract_excerpt":"Image captioning is a challenging problem owing to the complexity in understanding the image content and diverse ways of describing it in natural language. Recent advances in deep neural networks have substantially improved the performance of this task. Most state-of-the-art approaches follow an encoder-decoder framework, which generates captions using a sequential recurrent prediction model. However, in this paper, we introduce a novel decision-making framework for image captioning. We utilize a \"policy network\" and a \"value network\" to collaboratively generate captions. The policy network se"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.03899","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1704.03899","created_at":"2026-05-18T00:46:25.895875+00:00"},{"alias_kind":"arxiv_version","alias_value":"1704.03899v1","created_at":"2026-05-18T00:46:25.895875+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.03899","created_at":"2026-05-18T00:46:25.895875+00:00"},{"alias_kind":"pith_short_12","alias_value":"OP66Z23J3UXX","created_at":"2026-05-18T12:31:34.259226+00:00"},{"alias_kind":"pith_short_16","alias_value":"OP66Z23J3UXXMUIP","created_at":"2026-05-18T12:31:34.259226+00:00"},{"alias_kind":"pith_short_8","alias_value":"OP66Z23J","created_at":"2026-05-18T12:31:34.259226+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1906.12188","citing_title":"A Deep Decoder Structure Based on WordEmbedding Regression for An Encoder-Decoder Based Model for Image Captioning","ref_index":39,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H","json":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H.json","graph_json":"https://pith.science/api/pith-number/OP66Z23J3UXXMUIPJBQFUUJL7H/graph.json","events_json":"https://pith.science/api/pith-number/OP66Z23J3UXXMUIPJBQFUUJL7H/events.json","paper":"https://pith.science/paper/OP66Z23J"},"agent_actions":{"view_html":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H","download_json":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H.json","view_paper":"https://pith.science/paper/OP66Z23J","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1704.03899&json=true","fetch_graph":"https://pith.science/api/pith-number/OP66Z23J3UXXMUIPJBQFUUJL7H/graph.json","fetch_events":"https://pith.science/api/pith-number/OP66Z23J3UXXMUIPJBQFUUJL7H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H/action/storage_attestation","attest_author":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H/action/author_attestation","sign_citation":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H/action/citation_signature","submit_replication":"https://pith.science/pith/OP66Z23J3UXXMUIPJBQFUUJL7H/action/replication_record"}},"created_at":"2026-05-18T00:46:25.895875+00:00","updated_at":"2026-05-18T00:46:25.895875+00:00"}