{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:6JIUDJHVFPEHROHL5IIUJOKCDU","short_pith_number":"pith:6JIUDJHV","schema_version":"1.0","canonical_sha256":"f25141a4f52bc878b8ebea1144b9421d350481a032a5e56b69e4ab80b6669ea1","source":{"kind":"arxiv","id":"2605.17447","version":1},"attestation_state":"computed","paper":{"title":"FastOCR: Dynamic Visual Fixation via KV Cache Pruning for Efficient Document Parsing","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Ao Wang, Ben Wan, Guiguang Ding, Hui Chen, Ke Zhang, Leqi Shen, Sicheng Zhao, Tongxuan Liu, Yan Feng, Zihan Tang","submitted_at":"2026-05-17T13:39:47Z","abstract_excerpt":"Vision-Language Models (VLMs) have shown strong promise on Optical Character Recognition (OCR), yet the sheer number of visual tokens required to encode dense documents incurs prohibitive inference cost. Existing pruning methods rely on physical eviction, e.g., permanently discarding visual tokens during the prefill stage. While effective for natural images, this strategy fundamentally breaks down on OCR, where virtually every visual token may correspond to a character or structural element, and any irreversible loss leads to catastrophic accuracy degradation. We observe that, although documen"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.17447","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-17T13:39:47Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"8dec45d369813b80bebf23bbb6b7f3883bf95a80bd7232118337f5ee96110a29","abstract_canon_sha256":"2dd5b02f7c4df7423e41868d998ee100691db0651204f638b9b4682d10507c52"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:39.440606Z","signature_b64":"bsE18LDDz5ugMib6MuBSY3wAlOaFJrdtItyGr1Av8qlFVSuoblznrmySbYZsbzUsFMujYFPA36gt5LNgVcf8Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f25141a4f52bc878b8ebea1144b9421d350481a032a5e56b69e4ab80b6669ea1","last_reissued_at":"2026-05-20T00:04:39.439455Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:39.439455Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"FastOCR: Dynamic Visual Fixation via KV Cache Pruning for Efficient Document Parsing","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Ao Wang, Ben Wan, Guiguang Ding, Hui Chen, Ke Zhang, Leqi Shen, Sicheng Zhao, Tongxuan Liu, Yan Feng, Zihan Tang","submitted_at":"2026-05-17T13:39:47Z","abstract_excerpt":"Vision-Language Models (VLMs) have shown strong promise on Optical Character Recognition (OCR), yet the sheer number of visual tokens required to encode dense documents incurs prohibitive inference cost. Existing pruning methods rely on physical eviction, e.g., permanently discarding visual tokens during the prefill stage. While effective for natural images, this strategy fundamentally breaks down on OCR, where virtually every visual token may correspond to a character or structural element, and any irreversible loss leads to catastrophic accuracy degradation. We observe that, although documen"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17447","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17447/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T21:41:57.717052Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.668636Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"9d462a5ab611476bd171a0362ad0bbc80ee32f0ce3047a2bddc0c6ffbf28ecf6"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.17447","created_at":"2026-05-20T00:04:39.439604+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.17447v1","created_at":"2026-05-20T00:04:39.439604+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17447","created_at":"2026-05-20T00:04:39.439604+00:00"},{"alias_kind":"pith_short_12","alias_value":"6JIUDJHVFPEH","created_at":"2026-05-20T00:04:39.439604+00:00"},{"alias_kind":"pith_short_16","alias_value":"6JIUDJHVFPEHROHL","created_at":"2026-05-20T00:04:39.439604+00:00"},{"alias_kind":"pith_short_8","alias_value":"6JIUDJHV","created_at":"2026-05-20T00:04:39.439604+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.00392","citing_title":"RTPrune: Reading-Twice Inspired Token Pruning for Efficient DeepSeek-OCR Inference","ref_index":15,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU","json":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU.json","graph_json":"https://pith.science/api/pith-number/6JIUDJHVFPEHROHL5IIUJOKCDU/graph.json","events_json":"https://pith.science/api/pith-number/6JIUDJHVFPEHROHL5IIUJOKCDU/events.json","paper":"https://pith.science/paper/6JIUDJHV"},"agent_actions":{"view_html":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU","download_json":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU.json","view_paper":"https://pith.science/paper/6JIUDJHV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.17447&json=true","fetch_graph":"https://pith.science/api/pith-number/6JIUDJHVFPEHROHL5IIUJOKCDU/graph.json","fetch_events":"https://pith.science/api/pith-number/6JIUDJHVFPEHROHL5IIUJOKCDU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU/action/storage_attestation","attest_author":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU/action/author_attestation","sign_citation":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU/action/citation_signature","submit_replication":"https://pith.science/pith/6JIUDJHVFPEHROHL5IIUJOKCDU/action/replication_record"}},"created_at":"2026-05-20T00:04:39.439604+00:00","updated_at":"2026-05-20T00:04:39.439604+00:00"}