{"paper":{"title":"POVQA: Preference-Optimized Video Question Answering with Rationales for Data Efficiency","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.MM"],"primary_cat":"cs.CV","authors_text":"Ankit Ghimire, Ashim Dahal, Nick Rahimi, Saydul Akbar Murad","submitted_at":"2025-10-01T15:15:36Z","abstract_excerpt":"Long-video multimodal question answering requires structured reasoning over visual evidence and dialogue, but Large Vision-Language Models (LVLMs) are constrained by context-window and compute limits. We propose POVQA, which compresses each second into a temporally pooled image (1 fps pooled images) to maintain dense temporal coverage under a fixed token budget. We then train Qwen2.5-VL-7B with supervised fine-tuning (SFT) on rationale+answer targets, and optionally apply Direct Preference Optimization (DPO) for preference alignment. We introduce ReasonVQA as a pilot diagnostic dataset with 12"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.01009","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.01009/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"}