{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:345QV4PTXZL7TF33V6CPMWQTLM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"15b8f30bb571cdd6c6e6cc90edfa601e3ce04f54f935a5145a6de89e396b69cc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-15T16:02:48Z","title_canon_sha256":"cf4c6d90698f5cb344f26f51bcc7e188c0c6986665f14ab2c73a4596dbc0506c"},"schema_version":"1.0","source":{"id":"2605.16120","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16120","created_at":"2026-05-20T00:01:53Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16120v1","created_at":"2026-05-20T00:01:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16120","created_at":"2026-05-20T00:01:53Z"},{"alias_kind":"pith_short_12","alias_value":"345QV4PTXZL7","created_at":"2026-05-20T00:01:53Z"},{"alias_kind":"pith_short_16","alias_value":"345QV4PTXZL7TF33","created_at":"2026-05-20T00:01:53Z"},{"alias_kind":"pith_short_8","alias_value":"345QV4PT","created_at":"2026-05-20T00:01:53Z"}],"graph_snapshots":[{"event_id":"sha256:9ad18e5c26f4586bc0fd87b31b33cf92297abf713aacd8e84bafcf187c189d65","target":"graph","created_at":"2026-05-20T00:01:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"MERVIN achieving 79 out of 88 points in AI Challenge HCMC 2025 qualification phase and successfully retrieved all results for every query in the final round."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That combining keyframes, Gemini-enhanced transcripts, and video summaries via separate visual and textual embeddings will produce meaningfully better semantic retrieval than simpler single-modality baselines for Vietnamese news content."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"MERVIN is a multimodal retrieval system for Vietnamese news videos that integrates visual and textual features with LLM-enhanced transcripts and reports strong results on a 2025 AI challenge."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A framework unifies visual frames, enhanced transcripts, and summaries for retrieving events in Vietnamese news videos."}],"snapshot_sha256":"0aa5ef9b043ae2043242f3b668f9a12ba3eff251eeb0d2299d49c536eaa75a71"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"af80d2944a89e3cb518eacc227fb30f7313ac19dbe9036c79796c117239cafa9"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T22:01:30.072404Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T22:01:23.252191Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T17:33:33.850673Z","status":"skipped","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T16:41:55.473902Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.16120/integrity.json","findings":[],"snapshot_sha256":"57cee655147e37de862074a63c7bdb5c9683339d8504185812862b05c1da59c7","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The growth of online video platforms drives the need for effective, semantically grounded event retrieval. We present MERVIN, a unified multimodal framework for Vietnamese news videos that integrates keyframes, transcripts, and video summaries. Transcript quality is enhanced via Gemini 1.5 Flash, reducing noise from accents, background sounds, and recognition errors. Visual features are extracted with Perception Encoder, while a Vietnamese language model produces textual embeddings; both are indexed in Milvus for efficient similarity-based retrieval. In addition, a React-based interface enable","authors_text":"Anh-Duy Le, Anh-Tai Pham-Nguyen, Trung-Hieu Truong-Le, Tung-Duong Le-Duc","cross_cats":[],"headline":"A framework unifies visual frames, enhanced transcripts, and summaries for retrieving events in Vietnamese news videos.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-15T16:02:48Z","title":"MERVIN: A Unified Framework for Multimodal Event Retrieval in Vietnamese News Videos"},"references":{"count":16,"internal_anchors":2,"resolved_work":16,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"In: ICCV (2021)","work_id":"695f8346-dc7c-4278-81fa-91539bc1b8e1","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"In: Proceedings of NAACL-HLT (2019)","work_id":"899d60e1-b12d-4cfc-bb87-f58ec9ef4fa3","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"In: Proceedings of the 14th International Symposium on Information and Communication Technology (SOICT 2025)","work_id":"88bcdf00-5588-46a2-9992-58790c84fdae","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"https://github.com/mlfoundations/open_clip (2021)","work_id":"225b44a9-061d-4e51-826d-b7da3e9f13ec","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"In: NeurIPS (2022)","work_id":"7d93482f-6fa8-4384-a6e1-41f695e67b7d","year":2022}],"snapshot_sha256":"e255e13e1899d55ec0f7497ff0d364c2d9aff292133f47598d0a088d5eff7dd5"},"source":{"id":"2605.16120","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T21:47:33.484565Z","id":"d4e747f7-0c58-4031-9e52-62a05f0f1a43","model_set":{"reader":"grok-4.3"},"one_line_summary":"MERVIN is a multimodal retrieval system for Vietnamese news videos that integrates visual and textual features with LLM-enhanced transcripts and reports strong results on a 2025 AI challenge.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A framework unifies visual frames, enhanced transcripts, and summaries for retrieving events in Vietnamese news videos.","strongest_claim":"MERVIN achieving 79 out of 88 points in AI Challenge HCMC 2025 qualification phase and successfully retrieved all results for every query in the final round.","weakest_assumption":"That combining keyframes, Gemini-enhanced transcripts, and video summaries via separate visual and textual embeddings will produce meaningfully better semantic retrieval than simpler single-modality baselines for Vietnamese news content."}},"verdict_id":"d4e747f7-0c58-4031-9e52-62a05f0f1a43"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cbe710e990f125592f54b256536f9c4bb567a74267855a911e488165a95b7d0b","target":"record","created_at":"2026-05-20T00:01:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"15b8f30bb571cdd6c6e6cc90edfa601e3ce04f54f935a5145a6de89e396b69cc","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2026-05-15T16:02:48Z","title_canon_sha256":"cf4c6d90698f5cb344f26f51bcc7e188c0c6986665f14ab2c73a4596dbc0506c"},"schema_version":"1.0","source":{"id":"2605.16120","kind":"arxiv","version":1}},"canonical_sha256":"df3b0af1f3be57f9977baf84f65a135b24c9c488d058cfb96228afeed66ff06d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"df3b0af1f3be57f9977baf84f65a135b24c9c488d058cfb96228afeed66ff06d","first_computed_at":"2026-05-20T00:01:53.715193Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:01:53.715193Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9DLYNOpjAbI+lg/mMz2lkZfhm9TqNtOmyIDvs8+46TI6FJ813J2WUAWFAVWzNzvL8jJE1MtZces81oHGgBOdAw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:01:53.715925Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.16120","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cbe710e990f125592f54b256536f9c4bb567a74267855a911e488165a95b7d0b","sha256:9ad18e5c26f4586bc0fd87b31b33cf92297abf713aacd8e84bafcf187c189d65"],"state_sha256":"caa41d526f5ae4176b5faf593723aaf8013e0d6fb8739f51156244182db1c9b2"}