{"total":10,"items":[{"citing_arxiv_id":"2607.00375","ref_index":26,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LIST3R: Long-sequence Instance-aware 3D Reconstruction","primary_cat":"cs.CV","submitted_at":"2026-07-01T03:20:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LIST3R reconnects fragmented video subsequences using persistent instance anchors with semantic and geometric evidence to produce consistent global 3D reconstructions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.31488","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DrivingDepth: Sparse-Prompted Pixel-wise Scale Correction for Driving Depth Estimation","primary_cat":"cs.CV","submitted_at":"2026-06-30T11:07:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DrivingDepth achieves SOTA metric depth on nuScenes by residual pixel-wise scale correction on frozen foundation models using sparse LiDAR prompts, preserving geometric consistency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21562","ref_index":55,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Compressing Observation History into Agent Memory: Distilling Transformers into Recurrent Transformers","primary_cat":"cs.CV","submitted_at":"2026-06-19T15:58:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Distillation aligns compression mechanisms between full-history and recurrent transformers, enabling linear-time recurrent memory that narrows the performance gap for streaming vision and robotics tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.20562","ref_index":49,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MemoryWAM: Efficient World Action Modeling with Persistent Memory","primary_cat":"cs.RO","submitted_at":"2026-06-18T17:59:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"MemoryWAM is a world action model with a hybrid memory design using recent frames, anchor frames, and gist tokens for efficient long-horizon robotic manipulation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05035","ref_index":29,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Anchor3R: Streaming 3D Reconstruction with Transient Anchors for Long-Horizon Visual Mapping","primary_cat":"cs.CV","submitted_at":"2026-06-03T16:00:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Anchor3R reframes feed-forward 3D reconstruction as current-centric local measurement prediction, using loop-closure and motion averaging to produce coherent global maps from visual streams.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27367","ref_index":118,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SpatialBench: Is Your Spatial Foundation Model an All-Round Player?","primary_cat":"cs.CV","submitted_at":"2026-05-26T17:59:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"SpatialBench evaluates 41 spatial foundation models across 6 paradigms and 5 task suites, finds they are not all-round players, and introduces the DA-Next-5M dataset plus DA-Next baseline model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.26519","ref_index":72,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"$R^3$: 3D Reconstruction via Relative Regression","primary_cat":"cs.CV","submitted_at":"2026-05-26T04:03:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"R³ uses relative regression with confidence-weighted constraints from an MLP to support long-context offline and streaming 3D reconstruction without global coordinate assumptions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17478","ref_index":33,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Mamba-VGGT: Persistent Long-Sequence Video Geometry Grounded Transformer via External Sliding Window Mamba Memory","primary_cat":"cs.CV","submitted_at":"2026-05-17T14:34:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Mamba-VGGT introduces a Sliding Window Mamba memory module and Zero-Init Spatial Memory Injector to enable persistent long-range geometric reasoning in VGGT for extended video sequences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17303","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LongDPM: Overlap-Aware 4D Reconstruction from Long Monocular Videos","primary_cat":"cs.CV","submitted_at":"2026-05-17T07:41:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LongDPM introduces an overlap-aware chunk-based framework that registers and fuses local dynamic reconstructions to achieve coherent long-range 4D geometry and tracking from monocular video.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16981","ref_index":25,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Rethinking the State Update Gate for Long-Sequence Recurrent 3D Reconstruction","primary_cat":"cs.CV","submitted_at":"2026-05-16T13:00:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A closed-form scalar frame-level gate α_t derived from internal feature changes extends effective memory in recurrent 3D reconstruction and improves accuracy on long sequences up to 4541 frames.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}