{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:QKU6L4UJROKMEFQMFXGL655LEW","short_pith_number":"pith:QKU6L4UJ","canonical_record":{"source":{"id":"1808.08732","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-27T08:37:42Z","cross_cats_sorted":[],"title_canon_sha256":"d857aa44e9e794e61154a906e34b37c4ce65953628d25e6c40517448d60f02f3","abstract_canon_sha256":"a5563c88b7256a290905de68cce9c8de08be0c76350659d141a8cc5943dd50e2"},"schema_version":"1.0"},"canonical_sha256":"82a9e5f2898b94c2160c2dccbf77ab25be75c1ccb99ddfc7b8047b00d7207376","source":{"kind":"arxiv","id":"1808.08732","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.08732","created_at":"2026-05-18T00:07:13Z"},{"alias_kind":"arxiv_version","alias_value":"1808.08732v1","created_at":"2026-05-18T00:07:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.08732","created_at":"2026-05-18T00:07:13Z"},{"alias_kind":"pith_short_12","alias_value":"QKU6L4UJROKM","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"QKU6L4UJROKMEFQM","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"QKU6L4UJ","created_at":"2026-05-18T12:32:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:QKU6L4UJROKMEFQMFXGL655LEW","target":"record","payload":{"canonical_record":{"source":{"id":"1808.08732","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-27T08:37:42Z","cross_cats_sorted":[],"title_canon_sha256":"d857aa44e9e794e61154a906e34b37c4ce65953628d25e6c40517448d60f02f3","abstract_canon_sha256":"a5563c88b7256a290905de68cce9c8de08be0c76350659d141a8cc5943dd50e2"},"schema_version":"1.0"},"canonical_sha256":"82a9e5f2898b94c2160c2dccbf77ab25be75c1ccb99ddfc7b8047b00d7207376","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:07:13.427347Z","signature_b64":"C2XQZccDmQKisFJ2ADbZMko19v4Hnou5dr3E55ZqPBN3cBPyJ9UpPGNH0eZA4M+bYqrQulr/UCgQyw/qp1PiCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"82a9e5f2898b94c2160c2dccbf77ab25be75c1ccb99ddfc7b8047b00d7207376","last_reissued_at":"2026-05-18T00:07:13.426570Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:07:13.426570Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1808.08732","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:07:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"itxL2R5YTX3j75FCRD8ExKutHIyIhx/NZM4PWKmX2mp3R87UGyo/6yGveHYqsyqaGZX2sNlTZrzBnglBvRUfDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T12:47:04.693348Z"},"content_sha256":"27c6de19b9d9d9ec37edc295327300b37c912850b97c4d389b121bb9a334cef5","schema_version":"1.0","event_id":"sha256:27c6de19b9d9d9ec37edc295327300b37c912850b97c4d389b121bb9a334cef5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:QKU6L4UJROKMEFQMFXGL655LEW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"simNet: Stepwise Image-Topic Merging Network for Generating Detailed and Comprehensive Image Captions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fenglin Liu, Houfeng Wang, Xuancheng Ren, Xu Sun, Yuanxin Liu","submitted_at":"2018-08-27T08:37:42Z","abstract_excerpt":"The encode-decoder framework has shown recent success in image captioning. Visual attention, which is good at detailedness, and semantic attention, which is good at comprehensiveness, have been separately proposed to ground the caption on the image. In this paper, we propose the Stepwise Image-Topic Merging Network (simNet) that makes use of the two kinds of attention at the same time. At each time step when generating the caption, the decoder adaptively merges the attentive information in the extracted topics and the image according to the generated context, so that the visual information and"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.08732","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:07:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"U12GUOuVktgIHZx3gldGq+dqmFjF07bl81Ezi53DwcsdVnw8VlLnxjr1+WLqSUekwUgEOZZt5jLDuQncF/QYBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T12:47:04.693712Z"},"content_sha256":"2a28903e48e74c6091b80fe176fc3c4fec7b5c43cc43ec999259dff5fdde1238","schema_version":"1.0","event_id":"sha256:2a28903e48e74c6091b80fe176fc3c4fec7b5c43cc43ec999259dff5fdde1238"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QKU6L4UJROKMEFQMFXGL655LEW/bundle.json","state_url":"https://pith.science/pith/QKU6L4UJROKMEFQMFXGL655LEW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QKU6L4UJROKMEFQMFXGL655LEW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T12:47:04Z","links":{"resolver":"https://pith.science/pith/QKU6L4UJROKMEFQMFXGL655LEW","bundle":"https://pith.science/pith/QKU6L4UJROKMEFQMFXGL655LEW/bundle.json","state":"https://pith.science/pith/QKU6L4UJROKMEFQMFXGL655LEW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QKU6L4UJROKMEFQMFXGL655LEW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:QKU6L4UJROKMEFQMFXGL655LEW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a5563c88b7256a290905de68cce9c8de08be0c76350659d141a8cc5943dd50e2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-27T08:37:42Z","title_canon_sha256":"d857aa44e9e794e61154a906e34b37c4ce65953628d25e6c40517448d60f02f3"},"schema_version":"1.0","source":{"id":"1808.08732","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1808.08732","created_at":"2026-05-18T00:07:13Z"},{"alias_kind":"arxiv_version","alias_value":"1808.08732v1","created_at":"2026-05-18T00:07:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.08732","created_at":"2026-05-18T00:07:13Z"},{"alias_kind":"pith_short_12","alias_value":"QKU6L4UJROKM","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"QKU6L4UJROKMEFQM","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"QKU6L4UJ","created_at":"2026-05-18T12:32:46Z"}],"graph_snapshots":[{"event_id":"sha256:2a28903e48e74c6091b80fe176fc3c4fec7b5c43cc43ec999259dff5fdde1238","target":"graph","created_at":"2026-05-18T00:07:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The encode-decoder framework has shown recent success in image captioning. Visual attention, which is good at detailedness, and semantic attention, which is good at comprehensiveness, have been separately proposed to ground the caption on the image. In this paper, we propose the Stepwise Image-Topic Merging Network (simNet) that makes use of the two kinds of attention at the same time. At each time step when generating the caption, the decoder adaptively merges the attentive information in the extracted topics and the image according to the generated context, so that the visual information and","authors_text":"Fenglin Liu, Houfeng Wang, Xuancheng Ren, Xu Sun, Yuanxin Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-27T08:37:42Z","title":"simNet: Stepwise Image-Topic Merging Network for Generating Detailed and Comprehensive Image Captions"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.08732","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:27c6de19b9d9d9ec37edc295327300b37c912850b97c4d389b121bb9a334cef5","target":"record","created_at":"2026-05-18T00:07:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a5563c88b7256a290905de68cce9c8de08be0c76350659d141a8cc5943dd50e2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-08-27T08:37:42Z","title_canon_sha256":"d857aa44e9e794e61154a906e34b37c4ce65953628d25e6c40517448d60f02f3"},"schema_version":"1.0","source":{"id":"1808.08732","kind":"arxiv","version":1}},"canonical_sha256":"82a9e5f2898b94c2160c2dccbf77ab25be75c1ccb99ddfc7b8047b00d7207376","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82a9e5f2898b94c2160c2dccbf77ab25be75c1ccb99ddfc7b8047b00d7207376","first_computed_at":"2026-05-18T00:07:13.426570Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:07:13.426570Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"C2XQZccDmQKisFJ2ADbZMko19v4Hnou5dr3E55ZqPBN3cBPyJ9UpPGNH0eZA4M+bYqrQulr/UCgQyw/qp1PiCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:07:13.427347Z","signed_message":"canonical_sha256_bytes"},"source_id":"1808.08732","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:27c6de19b9d9d9ec37edc295327300b37c912850b97c4d389b121bb9a334cef5","sha256:2a28903e48e74c6091b80fe176fc3c4fec7b5c43cc43ec999259dff5fdde1238"],"state_sha256":"bd253e8c903708bd33bcf80a3c5bad4fec81869946f54cc0e8e9cf8dbab3f47e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"abwzLvJJRNmiMf1NJGCj0V+WdXnkpdVuqvanJ83b6l7gkzrRYnN8Ln7+kO5kM/12A9K50nXB338l8Xjif1UwAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T12:47:04.695743Z","bundle_sha256":"9e0a0cd5231645b762a488710cd27f33782f3b21bfcb71912abfa616684b1bdc"}}