{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:TQLVUJJD3SD3EG4FFUER6WXDEW","short_pith_number":"pith:TQLVUJJD","schema_version":"1.0","canonical_sha256":"9c175a2523dc87b21b852d091f5ae325bf98b9850c9d272a1aa50a5c29ea21f7","source":{"kind":"arxiv","id":"1704.01518","version":1},"attestation_state":"computed","paper":{"title":"Generating Descriptions with Grounded and Co-Referenced People","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Anna Rohrbach, Bernt Schiele, Marcus Rohrbach, Seong Joon Oh, Siyu Tang","submitted_at":"2017-04-05T16:36:13Z","abstract_excerpt":"Learning how to generate descriptions of images or videos received major interest both in the Computer Vision and Natural Language Processing communities. While a few works have proposed to learn a grounding during the generation process in an unsupervised way (via an attention mechanism), it remains unclear how good the quality of the grounding is and whether it benefits the description quality. In this work we propose a movie description model which learns to generate description and jointly ground (localize) the mentioned characters as well as do visual co-reference resolution between pairs"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1704.01518","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-04-05T16:36:13Z","cross_cats_sorted":[],"title_canon_sha256":"61b3d4c89991787b039ee0ae3b766bb0466955471e9307826cee3281d5d30d34","abstract_canon_sha256":"da8df8ea3e56b0ca1d801dded3043bd23093a1404a5cf353e5010d001626b84f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:46:56.252978Z","signature_b64":"1MGn1ZiBsPs1zE53EAD23OPoL+DLBilqSqz5zy1EcfG+vseyEqeyO7CbAq+ywj90Uxt36Vk8E0YeXOXTal1rCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9c175a2523dc87b21b852d091f5ae325bf98b9850c9d272a1aa50a5c29ea21f7","last_reissued_at":"2026-05-18T00:46:56.252452Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:46:56.252452Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Generating Descriptions with Grounded and Co-Referenced People","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Anna Rohrbach, Bernt Schiele, Marcus Rohrbach, Seong Joon Oh, Siyu Tang","submitted_at":"2017-04-05T16:36:13Z","abstract_excerpt":"Learning how to generate descriptions of images or videos received major interest both in the Computer Vision and Natural Language Processing communities. While a few works have proposed to learn a grounding during the generation process in an unsupervised way (via an attention mechanism), it remains unclear how good the quality of the grounding is and whether it benefits the description quality. In this work we propose a movie description model which learns to generate description and jointly ground (localize) the mentioned characters as well as do visual co-reference resolution between pairs"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.01518","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1704.01518","created_at":"2026-05-18T00:46:56.252531+00:00"},{"alias_kind":"arxiv_version","alias_value":"1704.01518v1","created_at":"2026-05-18T00:46:56.252531+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.01518","created_at":"2026-05-18T00:46:56.252531+00:00"},{"alias_kind":"pith_short_12","alias_value":"TQLVUJJD3SD3","created_at":"2026-05-18T12:31:46.661854+00:00"},{"alias_kind":"pith_short_16","alias_value":"TQLVUJJD3SD3EG4F","created_at":"2026-05-18T12:31:46.661854+00:00"},{"alias_kind":"pith_short_8","alias_value":"TQLVUJJD","created_at":"2026-05-18T12:31:46.661854+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW","json":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW.json","graph_json":"https://pith.science/api/pith-number/TQLVUJJD3SD3EG4FFUER6WXDEW/graph.json","events_json":"https://pith.science/api/pith-number/TQLVUJJD3SD3EG4FFUER6WXDEW/events.json","paper":"https://pith.science/paper/TQLVUJJD"},"agent_actions":{"view_html":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW","download_json":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW.json","view_paper":"https://pith.science/paper/TQLVUJJD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1704.01518&json=true","fetch_graph":"https://pith.science/api/pith-number/TQLVUJJD3SD3EG4FFUER6WXDEW/graph.json","fetch_events":"https://pith.science/api/pith-number/TQLVUJJD3SD3EG4FFUER6WXDEW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW/action/storage_attestation","attest_author":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW/action/author_attestation","sign_citation":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW/action/citation_signature","submit_replication":"https://pith.science/pith/TQLVUJJD3SD3EG4FFUER6WXDEW/action/replication_record"}},"created_at":"2026-05-18T00:46:56.252531+00:00","updated_at":"2026-05-18T00:46:56.252531+00:00"}