{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:CBKBG2OKGASCMKGZ5X5SYJCUAC","short_pith_number":"pith:CBKBG2OK","schema_version":"1.0","canonical_sha256":"10541369ca30242628d9edfb2c24540086fbde419f92c518822d0e97b6ca5e16","source":{"kind":"arxiv","id":"1708.02478","version":2},"attestation_state":"computed","paper":{"title":"From Deterministic to Generative: Multi-Modal Stochastic RNNs for Video Captioning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Alan Hanjalic, Heng Tao Shen, Jingkuan Song, Lianli Gao, Xuelong Li, Yuyu Guo","submitted_at":"2017-08-08T13:27:13Z","abstract_excerpt":"Video captioning in essential is a complex natural process, which is affected by various uncertainties stemming from video content, subjective judgment, etc. In this paper we build on the recent progress in using encoder-decoder framework for video captioning and address what we find to be a critical deficiency of the existing methods, that most of the decoders propagate deterministic hidden states. Such complex uncertainty cannot be modeled efficiently by the deterministic models. In this paper, we propose a generative approach, referred to as multi-modal stochastic RNNs networks (MS-RNN), wh"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1708.02478","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-08-08T13:27:13Z","cross_cats_sorted":[],"title_canon_sha256":"74ba6f8d5bba7d99a02a7b3321b66441f720c07a261b865e8dce71bb162c6912","abstract_canon_sha256":"5a14ea9932a971ff8be2bfe489191bb11bc3311648487a38bd82d9fb79b38ab0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:32:25.518640Z","signature_b64":"st1pbkObvMF+cSbV80rCybhbtp7HavDIHcWTBOsgyZYE+/XyXrFLhXNgj5+XLY/m8GlQTFO87lpLyeXJ3driBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"10541369ca30242628d9edfb2c24540086fbde419f92c518822d0e97b6ca5e16","last_reissued_at":"2026-05-18T00:32:25.517860Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:32:25.517860Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"From Deterministic to Generative: Multi-Modal Stochastic RNNs for Video Captioning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Alan Hanjalic, Heng Tao Shen, Jingkuan Song, Lianli Gao, Xuelong Li, Yuyu Guo","submitted_at":"2017-08-08T13:27:13Z","abstract_excerpt":"Video captioning in essential is a complex natural process, which is affected by various uncertainties stemming from video content, subjective judgment, etc. In this paper we build on the recent progress in using encoder-decoder framework for video captioning and address what we find to be a critical deficiency of the existing methods, that most of the decoders propagate deterministic hidden states. Such complex uncertainty cannot be modeled efficiently by the deterministic models. In this paper, we propose a generative approach, referred to as multi-modal stochastic RNNs networks (MS-RNN), wh"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.02478","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1708.02478","created_at":"2026-05-18T00:32:25.517987+00:00"},{"alias_kind":"arxiv_version","alias_value":"1708.02478v2","created_at":"2026-05-18T00:32:25.517987+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.02478","created_at":"2026-05-18T00:32:25.517987+00:00"},{"alias_kind":"pith_short_12","alias_value":"CBKBG2OKGASC","created_at":"2026-05-18T12:31:10.602751+00:00"},{"alias_kind":"pith_short_16","alias_value":"CBKBG2OKGASCMKGZ","created_at":"2026-05-18T12:31:10.602751+00:00"},{"alias_kind":"pith_short_8","alias_value":"CBKBG2OK","created_at":"2026-05-18T12:31:10.602751+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC","json":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC.json","graph_json":"https://pith.science/api/pith-number/CBKBG2OKGASCMKGZ5X5SYJCUAC/graph.json","events_json":"https://pith.science/api/pith-number/CBKBG2OKGASCMKGZ5X5SYJCUAC/events.json","paper":"https://pith.science/paper/CBKBG2OK"},"agent_actions":{"view_html":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC","download_json":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC.json","view_paper":"https://pith.science/paper/CBKBG2OK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1708.02478&json=true","fetch_graph":"https://pith.science/api/pith-number/CBKBG2OKGASCMKGZ5X5SYJCUAC/graph.json","fetch_events":"https://pith.science/api/pith-number/CBKBG2OKGASCMKGZ5X5SYJCUAC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC/action/storage_attestation","attest_author":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC/action/author_attestation","sign_citation":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC/action/citation_signature","submit_replication":"https://pith.science/pith/CBKBG2OKGASCMKGZ5X5SYJCUAC/action/replication_record"}},"created_at":"2026-05-18T00:32:25.517987+00:00","updated_at":"2026-05-18T00:32:25.517987+00:00"}