{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:67O7667FZH2TVZLYCNHKARVZ6D","short_pith_number":"pith:67O7667F","schema_version":"1.0","canonical_sha256":"f7ddff7be5c9f53ae578134ea046b9f0e8e7c72ab844ca87b48900e69afb248f","source":{"kind":"arxiv","id":"1610.02947","version":3},"attestation_state":"computed","paper":{"title":"End-to-end Concept Word Detection for Video Captioning, Retrieval, and Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Gunhee Kim, Hyungjin Ko, Jongwook Choi, Youngjae Yu","submitted_at":"2016-10-10T15:03:15Z","abstract_excerpt":"We propose a high-level concept word detector that can be integrated with any video-to-language models. It takes a video as input and generates a list of concept words as useful semantic priors for language generation models. The proposed word detector has two important properties. First, it does not require any external knowledge sources for training. Second, the proposed word detector is trainable in an end-to-end manner jointly with any video-to-language models. To maximize the values of detected words, we also develop a semantic attention mechanism that selectively focuses on the detected "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1610.02947","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-10-10T15:03:15Z","cross_cats_sorted":[],"title_canon_sha256":"a8339f6f6cc94b9c75c4fd4d2d1f6cf5516cc0a43cfe71fd455e85db7cd690f5","abstract_canon_sha256":"c90de6856ac614a3cae2d517014e71003b9f3b8927dea75aa40f39419519dcd0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:39:40.034993Z","signature_b64":"1poqLAmf+h+p2a8lUQ0k5bAljlPgsc9/p25vHE8JiP+rIABvwgZzxpaOW2DQ1BnN4DbdjYcu4++sbR8D88UfBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f7ddff7be5c9f53ae578134ea046b9f0e8e7c72ab844ca87b48900e69afb248f","last_reissued_at":"2026-05-18T00:39:40.034468Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:39:40.034468Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"End-to-end Concept Word Detection for Video Captioning, Retrieval, and Question Answering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Gunhee Kim, Hyungjin Ko, Jongwook Choi, Youngjae Yu","submitted_at":"2016-10-10T15:03:15Z","abstract_excerpt":"We propose a high-level concept word detector that can be integrated with any video-to-language models. It takes a video as input and generates a list of concept words as useful semantic priors for language generation models. The proposed word detector has two important properties. First, it does not require any external knowledge sources for training. Second, the proposed word detector is trainable in an end-to-end manner jointly with any video-to-language models. To maximize the values of detected words, we also develop a semantic attention mechanism that selectively focuses on the detected "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1610.02947","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1610.02947","created_at":"2026-05-18T00:39:40.034567+00:00"},{"alias_kind":"arxiv_version","alias_value":"1610.02947v3","created_at":"2026-05-18T00:39:40.034567+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1610.02947","created_at":"2026-05-18T00:39:40.034567+00:00"},{"alias_kind":"pith_short_12","alias_value":"67O7667FZH2T","created_at":"2026-05-18T12:30:01.593930+00:00"},{"alias_kind":"pith_short_16","alias_value":"67O7667FZH2TVZLY","created_at":"2026-05-18T12:30:01.593930+00:00"},{"alias_kind":"pith_short_8","alias_value":"67O7667F","created_at":"2026-05-18T12:30:01.593930+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2309.16671","citing_title":"Demystifying CLIP Data","ref_index":179,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D","json":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D.json","graph_json":"https://pith.science/api/pith-number/67O7667FZH2TVZLYCNHKARVZ6D/graph.json","events_json":"https://pith.science/api/pith-number/67O7667FZH2TVZLYCNHKARVZ6D/events.json","paper":"https://pith.science/paper/67O7667F"},"agent_actions":{"view_html":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D","download_json":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D.json","view_paper":"https://pith.science/paper/67O7667F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1610.02947&json=true","fetch_graph":"https://pith.science/api/pith-number/67O7667FZH2TVZLYCNHKARVZ6D/graph.json","fetch_events":"https://pith.science/api/pith-number/67O7667FZH2TVZLYCNHKARVZ6D/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D/action/timestamp_anchor","attest_storage":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D/action/storage_attestation","attest_author":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D/action/author_attestation","sign_citation":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D/action/citation_signature","submit_replication":"https://pith.science/pith/67O7667FZH2TVZLYCNHKARVZ6D/action/replication_record"}},"created_at":"2026-05-18T00:39:40.034567+00:00","updated_at":"2026-05-18T00:39:40.034567+00:00"}