{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:XRNC3X3YEDV5H6HFUBRRNSBTCM","short_pith_number":"pith:XRNC3X3Y","schema_version":"1.0","canonical_sha256":"bc5a2ddf7820ebd3f8e5a06316c833130fd4a412f8c7e3295563a0d6da2d9705","source":{"kind":"arxiv","id":"2512.20606","version":2},"attestation_state":"computed","paper":{"title":"Probing and Leveraging Video Diffusion Transformer Features for Robust Point Tracking","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chaehyun Kim, Dahyun Chung, Honggyu An, Hyunah Ko, Jisu Nam, Jung Yi, Junhwa Hur, Seungryong Kim, Siyoon Jin, Soowon Son","submitted_at":"2025-12-23T18:54:10Z","abstract_excerpt":"Despite achieving strong results on standard benchmarks, current point tracking methods rely on feature backbones that are rarely designed with the temporal coherence needed for robust real-world performance. While recent works incorporate powerful visual foundation model (VFM) features into tracking pipelines, no prior work has systematically analyzed which VFM provides the most robust representations for point tracking. We present the first such analysis, evaluating diverse VFMs in a zero-shot setting on both standard and robustness benchmarks for point tracking. Our study reveals that video"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.20606","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-12-23T18:54:10Z","cross_cats_sorted":[],"title_canon_sha256":"80fe8cac0b5226bb7754943c9d27d381cf4b82b28d76ea3747c0efc428a956c4","abstract_canon_sha256":"55f6998e6d346d9d1617455d3529c974f5b78e1fd88b1769f3aef351653f2359"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T02:17:13.866770Z","signature_b64":"pxyNke2ptpuRxbXXZ6JOqyMKk5pNDeoOv6pmNocTVYokTWbn9PP7Nt1MlX3xjC2Pwf2gZXtntCdLBCl01i0TCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bc5a2ddf7820ebd3f8e5a06316c833130fd4a412f8c7e3295563a0d6da2d9705","last_reissued_at":"2026-06-30T02:17:13.865959Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T02:17:13.865959Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Probing and Leveraging Video Diffusion Transformer Features for Robust Point Tracking","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chaehyun Kim, Dahyun Chung, Honggyu An, Hyunah Ko, Jisu Nam, Jung Yi, Junhwa Hur, Seungryong Kim, Siyoon Jin, Soowon Son","submitted_at":"2025-12-23T18:54:10Z","abstract_excerpt":"Despite achieving strong results on standard benchmarks, current point tracking methods rely on feature backbones that are rarely designed with the temporal coherence needed for robust real-world performance. While recent works incorporate powerful visual foundation model (VFM) features into tracking pipelines, no prior work has systematically analyzed which VFM provides the most robust representations for point tracking. We present the first such analysis, evaluating diverse VFMs in a zero-shot setting on both standard and robustness benchmarks for point tracking. Our study reveals that video"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.20606","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.20606/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.20606","created_at":"2026-06-30T02:17:13.866066+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.20606v2","created_at":"2026-06-30T02:17:13.866066+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.20606","created_at":"2026-06-30T02:17:13.866066+00:00"},{"alias_kind":"pith_short_12","alias_value":"XRNC3X3YEDV5","created_at":"2026-06-30T02:17:13.866066+00:00"},{"alias_kind":"pith_short_16","alias_value":"XRNC3X3YEDV5H6HF","created_at":"2026-06-30T02:17:13.866066+00:00"},{"alias_kind":"pith_short_8","alias_value":"XRNC3X3Y","created_at":"2026-06-30T02:17:13.866066+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.12587","citing_title":"TrackCraft3R: Repurposing Video Diffusion Transformers for Dense 3D Tracking","ref_index":63,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM","json":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM.json","graph_json":"https://pith.science/api/pith-number/XRNC3X3YEDV5H6HFUBRRNSBTCM/graph.json","events_json":"https://pith.science/api/pith-number/XRNC3X3YEDV5H6HFUBRRNSBTCM/events.json","paper":"https://pith.science/paper/XRNC3X3Y"},"agent_actions":{"view_html":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM","download_json":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM.json","view_paper":"https://pith.science/paper/XRNC3X3Y","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.20606&json=true","fetch_graph":"https://pith.science/api/pith-number/XRNC3X3YEDV5H6HFUBRRNSBTCM/graph.json","fetch_events":"https://pith.science/api/pith-number/XRNC3X3YEDV5H6HFUBRRNSBTCM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM/action/storage_attestation","attest_author":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM/action/author_attestation","sign_citation":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM/action/citation_signature","submit_replication":"https://pith.science/pith/XRNC3X3YEDV5H6HFUBRRNSBTCM/action/replication_record"}},"created_at":"2026-06-30T02:17:13.866066+00:00","updated_at":"2026-06-30T02:17:13.866066+00:00"}