{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:OLN65HNDSZQRGOT42G7BUYYHBY","short_pith_number":"pith:OLN65HND","schema_version":"1.0","canonical_sha256":"72dbee9da39661133a7cd1be1a63070e0817627a0bf1dd627e519e0135e067ba","source":{"kind":"arxiv","id":"2411.14279","version":2},"attestation_state":"computed","paper":{"title":"Looking Beyond Text: Reducing Language bias in Large Vision-Language Models via Multimodal Dual-Attention and Soft-Image Guidance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Baobao Chang, Haozhe Zhao, Liang Chen, Maosong Sun, Mingjia Zhang, Shuzheng Si, Yichi Zhang","submitted_at":"2024-11-21T16:33:30Z","abstract_excerpt":"Large vision-language models (LVLMs) have achieved impressive results in various vision-language tasks. However, despite showing promising performance, LVLMs suffer from hallucinations caused by language bias, leading to diminished focus on images and ineffective visual comprehension. We identify two primary reasons for this bias: 1. Different scales of training data between the pretraining stage of LLM and multimodal alignment stage. 2. The learned inference bias due to short-term dependency of text data. Therefore, we propose LACING, a systemic framework designed to address the language bias"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2411.14279","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2024-11-21T16:33:30Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"3dfc487d9c1fd7b51b76da4b075700d7e4bb019666c19f0ae5befa5d078e6b89","abstract_canon_sha256":"a01915e9a8f252c4b33799c80efee599b83d7cdb0c6a2e34cbf08b1560eea0f3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:04:51.585227Z","signature_b64":"Y8toakb3AFXhj0i6fNNL0627LmEYEh+xCVQ54aNMqknqmAApHo/oiIN8RXkDL0s4zOmWb9Z/IoIfIXkNgFEgAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"72dbee9da39661133a7cd1be1a63070e0817627a0bf1dd627e519e0135e067ba","last_reissued_at":"2026-05-29T01:04:51.584750Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:04:51.584750Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Looking Beyond Text: Reducing Language bias in Large Vision-Language Models via Multimodal Dual-Attention and Soft-Image Guidance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Baobao Chang, Haozhe Zhao, Liang Chen, Maosong Sun, Mingjia Zhang, Shuzheng Si, Yichi Zhang","submitted_at":"2024-11-21T16:33:30Z","abstract_excerpt":"Large vision-language models (LVLMs) have achieved impressive results in various vision-language tasks. However, despite showing promising performance, LVLMs suffer from hallucinations caused by language bias, leading to diminished focus on images and ineffective visual comprehension. We identify two primary reasons for this bias: 1. Different scales of training data between the pretraining stage of LLM and multimodal alignment stage. 2. The learned inference bias due to short-term dependency of text data. Therefore, we propose LACING, a systemic framework designed to address the language bias"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2411.14279","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2411.14279/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2411.14279","created_at":"2026-05-29T01:04:51.584807+00:00"},{"alias_kind":"arxiv_version","alias_value":"2411.14279v2","created_at":"2026-05-29T01:04:51.584807+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2411.14279","created_at":"2026-05-29T01:04:51.584807+00:00"},{"alias_kind":"pith_short_12","alias_value":"OLN65HNDSZQR","created_at":"2026-05-29T01:04:51.584807+00:00"},{"alias_kind":"pith_short_16","alias_value":"OLN65HNDSZQRGOT4","created_at":"2026-05-29T01:04:51.584807+00:00"},{"alias_kind":"pith_short_8","alias_value":"OLN65HND","created_at":"2026-05-29T01:04:51.584807+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY","json":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY.json","graph_json":"https://pith.science/api/pith-number/OLN65HNDSZQRGOT42G7BUYYHBY/graph.json","events_json":"https://pith.science/api/pith-number/OLN65HNDSZQRGOT42G7BUYYHBY/events.json","paper":"https://pith.science/paper/OLN65HND"},"agent_actions":{"view_html":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY","download_json":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY.json","view_paper":"https://pith.science/paper/OLN65HND","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2411.14279&json=true","fetch_graph":"https://pith.science/api/pith-number/OLN65HNDSZQRGOT42G7BUYYHBY/graph.json","fetch_events":"https://pith.science/api/pith-number/OLN65HNDSZQRGOT42G7BUYYHBY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY/action/storage_attestation","attest_author":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY/action/author_attestation","sign_citation":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY/action/citation_signature","submit_replication":"https://pith.science/pith/OLN65HNDSZQRGOT42G7BUYYHBY/action/replication_record"}},"created_at":"2026-05-29T01:04:51.584807+00:00","updated_at":"2026-05-29T01:04:51.584807+00:00"}