{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2014:SWF4RTD7KQ3GP6JCW6YG46R47R","short_pith_number":"pith:SWF4RTD7","canonical_record":{"source":{"id":"1411.4952","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2014-11-18T18:23:45Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"44682ae89d86c06746c7267dc5543adfb145ccf3f078c2c0715bad739ba71de1","abstract_canon_sha256":"dcf259deec8e0f4ba608fdc9f6eaf019c71bad87d8bf94b130306dd00a0cc2c2"},"schema_version":"1.0"},"canonical_sha256":"958bc8cc7f543667f922b7b06e7a3cfc66e541798bf002cd7f78ecd2db3b7a61","source":{"kind":"arxiv","id":"1411.4952","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1411.4952","created_at":"2026-05-18T01:20:21Z"},{"alias_kind":"arxiv_version","alias_value":"1411.4952v3","created_at":"2026-05-18T01:20:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1411.4952","created_at":"2026-05-18T01:20:21Z"},{"alias_kind":"pith_short_12","alias_value":"SWF4RTD7KQ3G","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_16","alias_value":"SWF4RTD7KQ3GP6JC","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_8","alias_value":"SWF4RTD7","created_at":"2026-05-18T12:28:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2014:SWF4RTD7KQ3GP6JCW6YG46R47R","target":"record","payload":{"canonical_record":{"source":{"id":"1411.4952","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2014-11-18T18:23:45Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"44682ae89d86c06746c7267dc5543adfb145ccf3f078c2c0715bad739ba71de1","abstract_canon_sha256":"dcf259deec8e0f4ba608fdc9f6eaf019c71bad87d8bf94b130306dd00a0cc2c2"},"schema_version":"1.0"},"canonical_sha256":"958bc8cc7f543667f922b7b06e7a3cfc66e541798bf002cd7f78ecd2db3b7a61","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:20:21.033535Z","signature_b64":"a0uo79/rAgo2ah6FNwFaolxWCBuYC+QmkXej0iqi2uZge+wKkHAxVgThplGxOrMKsgrK3WPV9IYqlfB1yeMuCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"958bc8cc7f543667f922b7b06e7a3cfc66e541798bf002cd7f78ecd2db3b7a61","last_reissued_at":"2026-05-18T01:20:21.032797Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:20:21.032797Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1411.4952","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:20:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"irff4kaBK37d7/657zwPBaxyWR3biFP+i1aLwqgC1psf2pcn0rEAzlqoVqmp7Orci7EAvgDHS6GmTU8XNdWQBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T19:28:00.714923Z"},"content_sha256":"0dd599c01afa5734687a116b9538014398bf3299365ace3ecdabb1205fb01b12","schema_version":"1.0","event_id":"sha256:0dd599c01afa5734687a116b9538014398bf3299365ace3ecdabb1205fb01b12"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2014:SWF4RTD7KQ3GP6JCW6YG46R47R","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"From Captions to Visual Concepts and Back","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"C. Lawrence Zitnick, Forrest Iandola, Geoffrey Zweig, Hao Fang, Jianfeng Gao, John C. Platt, Li Deng, Margaret Mitchell, Piotr Doll\\'ar, Rupesh Srivastava, Saurabh Gupta, Xiaodong He","submitted_at":"2014-11-18T18:23:45Z","abstract_excerpt":"This paper presents a novel approach for automatically generating image descriptions: visual detectors, language models, and multimodal similarity models learnt directly from a dataset of image captions. We use multiple instance learning to train visual detectors for words that commonly occur in captions, including many different parts of speech such as nouns, verbs, and adjectives. The word detector outputs serve as conditional inputs to a maximum-entropy language model. The language model learns from a set of over 400,000 image descriptions to capture the statistics of word usage. We capture"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1411.4952","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:20:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"56oVlLVigjckdZILI+3Nl+pFZdiOsXk9YMoz0kUl1bOOYIUGbVaQMNwmSE+iyd8Tm2mOdxPe86Yxq/62Q+JABA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T19:28:00.715292Z"},"content_sha256":"1a891a7295eee95da8459263b7d687ccb3775933b0a19b290fb6d8059ece7a75","schema_version":"1.0","event_id":"sha256:1a891a7295eee95da8459263b7d687ccb3775933b0a19b290fb6d8059ece7a75"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SWF4RTD7KQ3GP6JCW6YG46R47R/bundle.json","state_url":"https://pith.science/pith/SWF4RTD7KQ3GP6JCW6YG46R47R/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SWF4RTD7KQ3GP6JCW6YG46R47R/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T19:28:00Z","links":{"resolver":"https://pith.science/pith/SWF4RTD7KQ3GP6JCW6YG46R47R","bundle":"https://pith.science/pith/SWF4RTD7KQ3GP6JCW6YG46R47R/bundle.json","state":"https://pith.science/pith/SWF4RTD7KQ3GP6JCW6YG46R47R/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SWF4RTD7KQ3GP6JCW6YG46R47R/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2014:SWF4RTD7KQ3GP6JCW6YG46R47R","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"dcf259deec8e0f4ba608fdc9f6eaf019c71bad87d8bf94b130306dd00a0cc2c2","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2014-11-18T18:23:45Z","title_canon_sha256":"44682ae89d86c06746c7267dc5543adfb145ccf3f078c2c0715bad739ba71de1"},"schema_version":"1.0","source":{"id":"1411.4952","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1411.4952","created_at":"2026-05-18T01:20:21Z"},{"alias_kind":"arxiv_version","alias_value":"1411.4952v3","created_at":"2026-05-18T01:20:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1411.4952","created_at":"2026-05-18T01:20:21Z"},{"alias_kind":"pith_short_12","alias_value":"SWF4RTD7KQ3G","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_16","alias_value":"SWF4RTD7KQ3GP6JC","created_at":"2026-05-18T12:28:49Z"},{"alias_kind":"pith_short_8","alias_value":"SWF4RTD7","created_at":"2026-05-18T12:28:49Z"}],"graph_snapshots":[{"event_id":"sha256:1a891a7295eee95da8459263b7d687ccb3775933b0a19b290fb6d8059ece7a75","target":"graph","created_at":"2026-05-18T01:20:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper presents a novel approach for automatically generating image descriptions: visual detectors, language models, and multimodal similarity models learnt directly from a dataset of image captions. We use multiple instance learning to train visual detectors for words that commonly occur in captions, including many different parts of speech such as nouns, verbs, and adjectives. The word detector outputs serve as conditional inputs to a maximum-entropy language model. The language model learns from a set of over 400,000 image descriptions to capture the statistics of word usage. We capture","authors_text":"C. Lawrence Zitnick, Forrest Iandola, Geoffrey Zweig, Hao Fang, Jianfeng Gao, John C. Platt, Li Deng, Margaret Mitchell, Piotr Doll\\'ar, Rupesh Srivastava, Saurabh Gupta, Xiaodong He","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2014-11-18T18:23:45Z","title":"From Captions to Visual Concepts and Back"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1411.4952","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0dd599c01afa5734687a116b9538014398bf3299365ace3ecdabb1205fb01b12","target":"record","created_at":"2026-05-18T01:20:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"dcf259deec8e0f4ba608fdc9f6eaf019c71bad87d8bf94b130306dd00a0cc2c2","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2014-11-18T18:23:45Z","title_canon_sha256":"44682ae89d86c06746c7267dc5543adfb145ccf3f078c2c0715bad739ba71de1"},"schema_version":"1.0","source":{"id":"1411.4952","kind":"arxiv","version":3}},"canonical_sha256":"958bc8cc7f543667f922b7b06e7a3cfc66e541798bf002cd7f78ecd2db3b7a61","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"958bc8cc7f543667f922b7b06e7a3cfc66e541798bf002cd7f78ecd2db3b7a61","first_computed_at":"2026-05-18T01:20:21.032797Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:20:21.032797Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"a0uo79/rAgo2ah6FNwFaolxWCBuYC+QmkXej0iqi2uZge+wKkHAxVgThplGxOrMKsgrK3WPV9IYqlfB1yeMuCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:20:21.033535Z","signed_message":"canonical_sha256_bytes"},"source_id":"1411.4952","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0dd599c01afa5734687a116b9538014398bf3299365ace3ecdabb1205fb01b12","sha256:1a891a7295eee95da8459263b7d687ccb3775933b0a19b290fb6d8059ece7a75"],"state_sha256":"467422aac6d513fa2313836878c6f8673462bf4af0e5ed2dcbe544d481f8ca2c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CRFDZ//m9IG93t3q16BNp8oUIjfAB3Akl7/i0K0PzyCsvR2Qj/38Zpe6J/E2EW11bdALo0GqYg/dDmNgUuwnDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T19:28:00.717544Z","bundle_sha256":"412c140b95ea9e32ad4ce6b6a6a5bd1922e04335bf298e14d2200605f1908285"}}