{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:KR6YZGW4OZGL2PRGA5U7PGCMBD","short_pith_number":"pith:KR6YZGW4","schema_version":"1.0","canonical_sha256":"547d8c9adc764cbd3e260769f7984c08c582b415dd83db9ad6aa8d2307aeac7b","source":{"kind":"arxiv","id":"2605.23868","version":1},"attestation_state":"computed","paper":{"title":"Vision Transformers Need Better Token Interaction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Linxiang Su","submitted_at":"2026-05-22T17:25:16Z","abstract_excerpt":"Vision Transformers (ViTs) can learn strong image-level representations while their patch representations become less effective for dense prediction during prolonged training. We revisit this dense degradation phenomenon and argue that it is not fully explained by high-norm artifacts alone. Instead, we characterize \\emph{semantic diffusion}: an optimization shortcut in which global semantic information spreads through patch tokens beyond what is locally justified. Our analysis shows that dense representation quality is not captured by locality alone: shallow features can remain better aligned "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.23868","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-22T17:25:16Z","cross_cats_sorted":[],"title_canon_sha256":"4dbf789ce831efc5a9ff901a812fab38f77d5978f787f2cf8d8bc4d9d54dc6b3","abstract_canon_sha256":"a4b04bb2b85c18a5e34d46dbb416b43a05cd85b4e27d68b61bbcb5af625a0855"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:02:36.821302Z","signature_b64":"pw8Xf6QpSKH1ribda+u2Tdu9YBqtwCY2LtIFEhVJ0b5H/7oi2cZq64Ell8maTwaGdI5FZ/GZq9sWNqqDeeB3Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"547d8c9adc764cbd3e260769f7984c08c582b415dd83db9ad6aa8d2307aeac7b","last_reissued_at":"2026-05-25T02:02:36.820551Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:02:36.820551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Vision Transformers Need Better Token Interaction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Linxiang Su","submitted_at":"2026-05-22T17:25:16Z","abstract_excerpt":"Vision Transformers (ViTs) can learn strong image-level representations while their patch representations become less effective for dense prediction during prolonged training. We revisit this dense degradation phenomenon and argue that it is not fully explained by high-norm artifacts alone. Instead, we characterize \\emph{semantic diffusion}: an optimization shortcut in which global semantic information spreads through patch tokens beyond what is locally justified. Our analysis shows that dense representation quality is not captured by locality alone: shallow features can remain better aligned "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23868","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.23868/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.23868","created_at":"2026-05-25T02:02:36.820677+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.23868v1","created_at":"2026-05-25T02:02:36.820677+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23868","created_at":"2026-05-25T02:02:36.820677+00:00"},{"alias_kind":"pith_short_12","alias_value":"KR6YZGW4OZGL","created_at":"2026-05-25T02:02:36.820677+00:00"},{"alias_kind":"pith_short_16","alias_value":"KR6YZGW4OZGL2PRG","created_at":"2026-05-25T02:02:36.820677+00:00"},{"alias_kind":"pith_short_8","alias_value":"KR6YZGW4","created_at":"2026-05-25T02:02:36.820677+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD","json":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD.json","graph_json":"https://pith.science/api/pith-number/KR6YZGW4OZGL2PRGA5U7PGCMBD/graph.json","events_json":"https://pith.science/api/pith-number/KR6YZGW4OZGL2PRGA5U7PGCMBD/events.json","paper":"https://pith.science/paper/KR6YZGW4"},"agent_actions":{"view_html":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD","download_json":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD.json","view_paper":"https://pith.science/paper/KR6YZGW4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.23868&json=true","fetch_graph":"https://pith.science/api/pith-number/KR6YZGW4OZGL2PRGA5U7PGCMBD/graph.json","fetch_events":"https://pith.science/api/pith-number/KR6YZGW4OZGL2PRGA5U7PGCMBD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD/action/storage_attestation","attest_author":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD/action/author_attestation","sign_citation":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD/action/citation_signature","submit_replication":"https://pith.science/pith/KR6YZGW4OZGL2PRGA5U7PGCMBD/action/replication_record"}},"created_at":"2026-05-25T02:02:36.820677+00:00","updated_at":"2026-05-25T02:02:36.820677+00:00"}