{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:HRTY7XLNR7X2O26K2LOX6A54ME","short_pith_number":"pith:HRTY7XLN","schema_version":"1.0","canonical_sha256":"3c678fdd6d8fefa76bcad2dd7f03bc6102f1c9329df6aa2d8b01a79ef033786f","source":{"kind":"arxiv","id":"1712.01892","version":2},"attestation_state":"computed","paper":{"title":"Grounding Referring Expressions in Images by Variational Context","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hanwang Zhang, Shih-Fu Chang, Yulei Niu","submitted_at":"2017-12-05T19:57:52Z","abstract_excerpt":"We focus on grounding (i.e., localizing or linking) referring expressions in images, e.g., \"largest elephant standing behind baby elephant\". This is a general yet challenging vision-language task since it does not only require the localization of objects, but also the multimodal comprehension of context --- visual attributes (e.g., \"largest\", \"baby\") and relationships (e.g., \"behind\") that help to distinguish the referent from other objects, especially those of the same category. Due to the exponential complexity involved in modeling the context associated with multiple image regions, existing"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1712.01892","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-12-05T19:57:52Z","cross_cats_sorted":[],"title_canon_sha256":"42da911c76973f68da4a0fc62fe84ca9fbcd8af8d10860b320cd7ad8de9219d1","abstract_canon_sha256":"716bd2d8d2c040a491742d23ad8d067e4079b50534baacecd06a2491c2b49a2e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:40.053333Z","signature_b64":"qQfWkjFauCep21kmJsiVnY0BGsxvcDu4FqsG+KS9ZsMYvKHt8XUUxg5Rf9/Sc0p/lqVPojvl81izm4yO5bLtAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3c678fdd6d8fefa76bcad2dd7f03bc6102f1c9329df6aa2d8b01a79ef033786f","last_reissued_at":"2026-05-17T23:41:40.052787Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:40.052787Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Grounding Referring Expressions in Images by Variational Context","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hanwang Zhang, Shih-Fu Chang, Yulei Niu","submitted_at":"2017-12-05T19:57:52Z","abstract_excerpt":"We focus on grounding (i.e., localizing or linking) referring expressions in images, e.g., \"largest elephant standing behind baby elephant\". This is a general yet challenging vision-language task since it does not only require the localization of objects, but also the multimodal comprehension of context --- visual attributes (e.g., \"largest\", \"baby\") and relationships (e.g., \"behind\") that help to distinguish the referent from other objects, especially those of the same category. Due to the exponential complexity involved in modeling the context associated with multiple image regions, existing"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.01892","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1712.01892","created_at":"2026-05-17T23:41:40.052893+00:00"},{"alias_kind":"arxiv_version","alias_value":"1712.01892v2","created_at":"2026-05-17T23:41:40.052893+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.01892","created_at":"2026-05-17T23:41:40.052893+00:00"},{"alias_kind":"pith_short_12","alias_value":"HRTY7XLNR7X2","created_at":"2026-05-18T12:31:18.294218+00:00"},{"alias_kind":"pith_short_16","alias_value":"HRTY7XLNR7X2O26K","created_at":"2026-05-18T12:31:18.294218+00:00"},{"alias_kind":"pith_short_8","alias_value":"HRTY7XLN","created_at":"2026-05-18T12:31:18.294218+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME","json":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME.json","graph_json":"https://pith.science/api/pith-number/HRTY7XLNR7X2O26K2LOX6A54ME/graph.json","events_json":"https://pith.science/api/pith-number/HRTY7XLNR7X2O26K2LOX6A54ME/events.json","paper":"https://pith.science/paper/HRTY7XLN"},"agent_actions":{"view_html":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME","download_json":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME.json","view_paper":"https://pith.science/paper/HRTY7XLN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1712.01892&json=true","fetch_graph":"https://pith.science/api/pith-number/HRTY7XLNR7X2O26K2LOX6A54ME/graph.json","fetch_events":"https://pith.science/api/pith-number/HRTY7XLNR7X2O26K2LOX6A54ME/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME/action/storage_attestation","attest_author":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME/action/author_attestation","sign_citation":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME/action/citation_signature","submit_replication":"https://pith.science/pith/HRTY7XLNR7X2O26K2LOX6A54ME/action/replication_record"}},"created_at":"2026-05-17T23:41:40.052893+00:00","updated_at":"2026-05-17T23:41:40.052893+00:00"}