{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:UL3QVXBVRONFC5N6MXMU3KKC6C","short_pith_number":"pith:UL3QVXBV","schema_version":"1.0","canonical_sha256":"a2f70adc358b9a5175be65d94da942f0aafad00c3e943880e3a5a1b725f0f291","source":{"kind":"arxiv","id":"1803.09845","version":1},"attestation_state":"computed","paper":{"title":"Neural Baby Talk","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Devi Parikh, Dhruv Batra, Jianwei Yang, Jiasen Lu","submitted_at":"2018-03-27T01:59:56Z","abstract_excerpt":"We introduce a novel framework for image captioning that can produce natural language explicitly grounded in entities that object detectors find in the image. Our approach reconciles classical slot filling approaches (that are generally better grounded in images) with modern neural captioning approaches (that are generally more natural sounding and accurate). Our approach first generates a sentence `template' with slot locations explicitly tied to specific image regions. These slots are then filled in by visual concepts identified in the regions by object detectors. The entire architecture (se"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1803.09845","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-03-27T01:59:56Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"b9ed7a73d3f455f646fa96aa6016bd4d1c555cb4f3ce1fdac3775d344d422d71","abstract_canon_sha256":"1ce250452dfdc528ad9d12d906e43d2a68492514cb2e4c3c3d642dd594cc41d3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:20:10.298739Z","signature_b64":"rZKazhYEGW2ozCB6Ol30OeEHRmMHdLkGxXJRM/nmQ72HASI6Ng1BEiCVpcZM/r4RXTioqG8YuXcRIlyNY4z0AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a2f70adc358b9a5175be65d94da942f0aafad00c3e943880e3a5a1b725f0f291","last_reissued_at":"2026-05-18T00:20:10.298108Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:20:10.298108Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Neural Baby Talk","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Devi Parikh, Dhruv Batra, Jianwei Yang, Jiasen Lu","submitted_at":"2018-03-27T01:59:56Z","abstract_excerpt":"We introduce a novel framework for image captioning that can produce natural language explicitly grounded in entities that object detectors find in the image. Our approach reconciles classical slot filling approaches (that are generally better grounded in images) with modern neural captioning approaches (that are generally more natural sounding and accurate). Our approach first generates a sentence `template' with slot locations explicitly tied to specific image regions. These slots are then filled in by visual concepts identified in the regions by object detectors. The entire architecture (se"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09845","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1803.09845","created_at":"2026-05-18T00:20:10.298193+00:00"},{"alias_kind":"arxiv_version","alias_value":"1803.09845v1","created_at":"2026-05-18T00:20:10.298193+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09845","created_at":"2026-05-18T00:20:10.298193+00:00"},{"alias_kind":"pith_short_12","alias_value":"UL3QVXBVRONF","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_16","alias_value":"UL3QVXBVRONFC5N6","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_8","alias_value":"UL3QVXBV","created_at":"2026-05-18T12:32:56.356000+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C","json":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C.json","graph_json":"https://pith.science/api/pith-number/UL3QVXBVRONFC5N6MXMU3KKC6C/graph.json","events_json":"https://pith.science/api/pith-number/UL3QVXBVRONFC5N6MXMU3KKC6C/events.json","paper":"https://pith.science/paper/UL3QVXBV"},"agent_actions":{"view_html":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C","download_json":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C.json","view_paper":"https://pith.science/paper/UL3QVXBV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1803.09845&json=true","fetch_graph":"https://pith.science/api/pith-number/UL3QVXBVRONFC5N6MXMU3KKC6C/graph.json","fetch_events":"https://pith.science/api/pith-number/UL3QVXBVRONFC5N6MXMU3KKC6C/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C/action/storage_attestation","attest_author":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C/action/author_attestation","sign_citation":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C/action/citation_signature","submit_replication":"https://pith.science/pith/UL3QVXBVRONFC5N6MXMU3KKC6C/action/replication_record"}},"created_at":"2026-05-18T00:20:10.298193+00:00","updated_at":"2026-05-18T00:20:10.298193+00:00"}