{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:EHO3VINHDZYS6YOAVDDPLSNIIJ","short_pith_number":"pith:EHO3VINH","canonical_record":{"source":{"id":"2606.05753","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-04T06:26:18Z","cross_cats_sorted":[],"title_canon_sha256":"ccf90308bb8dcfae4e237ef0afbbbd73b43e6860d0e5278f150ffec55143a46e","abstract_canon_sha256":"916c183573709df1a3e10652b5db526b866b0b6b7cc729ea8a04a845472e6fa9"},"schema_version":"1.0"},"canonical_sha256":"21ddbaa1a71e712f61c0a8c6f5c9a8427203aa7d537aa9e13397b3ecfcd0f7bd","source":{"kind":"arxiv","id":"2606.05753","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.05753","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"arxiv_version","alias_value":"2606.05753v1","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.05753","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"pith_short_12","alias_value":"EHO3VINHDZYS","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"pith_short_16","alias_value":"EHO3VINHDZYS6YOA","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"pith_short_8","alias_value":"EHO3VINH","created_at":"2026-06-05T01:15:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:EHO3VINHDZYS6YOAVDDPLSNIIJ","target":"record","payload":{"canonical_record":{"source":{"id":"2606.05753","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-04T06:26:18Z","cross_cats_sorted":[],"title_canon_sha256":"ccf90308bb8dcfae4e237ef0afbbbd73b43e6860d0e5278f150ffec55143a46e","abstract_canon_sha256":"916c183573709df1a3e10652b5db526b866b0b6b7cc729ea8a04a845472e6fa9"},"schema_version":"1.0"},"canonical_sha256":"21ddbaa1a71e712f61c0a8c6f5c9a8427203aa7d537aa9e13397b3ecfcd0f7bd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:01.776062Z","signature_b64":"tGgdI/I09E5ZuFXs8FnCiCIwID44ABO6rJ8hN51ajyzo2wQ3i9h9SeR73aCJDTt5C9Lstfhimn6GX++KZN5JBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"21ddbaa1a71e712f61c0a8c6f5c9a8427203aa7d537aa9e13397b3ecfcd0f7bd","last_reissued_at":"2026-06-05T01:15:01.775342Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:01.775342Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.05753","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:15:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MhAwg9EOXYVNLrfMKDc6MkFDIY4pNV/ip69VkEwDqhtVCNs4pq6yS3jr07Z87vMa+3oH5Uv+ok6JpcwXmi8hCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T12:14:46.557469Z"},"content_sha256":"06bf5066a3f5e2de190c22944c5d2ee1877c9a9092091c5e565a1789faa87e48","schema_version":"1.0","event_id":"sha256:06bf5066a3f5e2de190c22944c5d2ee1877c9a9092091c5e565a1789faa87e48"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:EHO3VINHDZYS6YOAVDDPLSNIIJ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Cosine Misleads: Auxiliary Losses Reshape Vision Language Models, Not Their Latents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Junfeng Fang, XiuYu Zhang, Zhenkai Liang","submitted_at":"2026-06-04T06:26:18Z","abstract_excerpt":"Latent visual reasoning (LVR) inserts supervised latent tokens between perception and answer generation in vision-language models (VLMs). The field uses alignment between these latents and their visual targets, i.e., cosine similarity or mean squared error (MSE), as both the training loss and the quality metric, assuming that better alignment yields a better answer. We test this with a designed matrix of five LVR variants and find the assumption inverted: cosine alignment is negatively correlated with accuracy across all five (r=-0.94). To explain this, we introduce PRISM, a pair of inference-"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.05753","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.05753/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:15:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EoCxahhRxkxy9Pl9BGYAV0sWdSGCJiGvj3bX5ilGA1j4ghKCQnsdC2tMtH95+zyPSDaFuTg++9EwZnCEQqFRCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-28T12:14:46.557832Z"},"content_sha256":"9fba027588184f1cfa69ed80196e8b606f2c3549aeb833f99f0206e67e66a631","schema_version":"1.0","event_id":"sha256:9fba027588184f1cfa69ed80196e8b606f2c3549aeb833f99f0206e67e66a631"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ/bundle.json","state_url":"https://pith.science/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-28T12:14:46Z","links":{"resolver":"https://pith.science/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ","bundle":"https://pith.science/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ/bundle.json","state":"https://pith.science/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EHO3VINHDZYS6YOAVDDPLSNIIJ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EHO3VINHDZYS6YOAVDDPLSNIIJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"916c183573709df1a3e10652b5db526b866b0b6b7cc729ea8a04a845472e6fa9","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-04T06:26:18Z","title_canon_sha256":"ccf90308bb8dcfae4e237ef0afbbbd73b43e6860d0e5278f150ffec55143a46e"},"schema_version":"1.0","source":{"id":"2606.05753","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.05753","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"arxiv_version","alias_value":"2606.05753v1","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.05753","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"pith_short_12","alias_value":"EHO3VINHDZYS","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"pith_short_16","alias_value":"EHO3VINHDZYS6YOA","created_at":"2026-06-05T01:15:01Z"},{"alias_kind":"pith_short_8","alias_value":"EHO3VINH","created_at":"2026-06-05T01:15:01Z"}],"graph_snapshots":[{"event_id":"sha256:9fba027588184f1cfa69ed80196e8b606f2c3549aeb833f99f0206e67e66a631","target":"graph","created_at":"2026-06-05T01:15:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.05753/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Latent visual reasoning (LVR) inserts supervised latent tokens between perception and answer generation in vision-language models (VLMs). The field uses alignment between these latents and their visual targets, i.e., cosine similarity or mean squared error (MSE), as both the training loss and the quality metric, assuming that better alignment yields a better answer. We test this with a designed matrix of five LVR variants and find the assumption inverted: cosine alignment is negatively correlated with accuracy across all five (r=-0.94). To explain this, we introduce PRISM, a pair of inference-","authors_text":"Junfeng Fang, XiuYu Zhang, Zhenkai Liang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-04T06:26:18Z","title":"Cosine Misleads: Auxiliary Losses Reshape Vision Language Models, Not Their Latents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.05753","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:06bf5066a3f5e2de190c22944c5d2ee1877c9a9092091c5e565a1789faa87e48","target":"record","created_at":"2026-06-05T01:15:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"916c183573709df1a3e10652b5db526b866b0b6b7cc729ea8a04a845472e6fa9","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-04T06:26:18Z","title_canon_sha256":"ccf90308bb8dcfae4e237ef0afbbbd73b43e6860d0e5278f150ffec55143a46e"},"schema_version":"1.0","source":{"id":"2606.05753","kind":"arxiv","version":1}},"canonical_sha256":"21ddbaa1a71e712f61c0a8c6f5c9a8427203aa7d537aa9e13397b3ecfcd0f7bd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"21ddbaa1a71e712f61c0a8c6f5c9a8427203aa7d537aa9e13397b3ecfcd0f7bd","first_computed_at":"2026-06-05T01:15:01.775342Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-05T01:15:01.775342Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tGgdI/I09E5ZuFXs8FnCiCIwID44ABO6rJ8hN51ajyzo2wQ3i9h9SeR73aCJDTt5C9Lstfhimn6GX++KZN5JBw==","signature_status":"signed_v1","signed_at":"2026-06-05T01:15:01.776062Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.05753","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:06bf5066a3f5e2de190c22944c5d2ee1877c9a9092091c5e565a1789faa87e48","sha256:9fba027588184f1cfa69ed80196e8b606f2c3549aeb833f99f0206e67e66a631"],"state_sha256":"ba2ade9aca79243eef7317dc7793876899fb8399593da67d9223d66ddc6e9c96"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"j8y3rDQlBTbmxlb3FMp5Hbx6mRdPBfVdif5kC5GBvrqUBE6DSUVNoZIYwKg50rCovib+s8nTWJZQT/EqlpQvDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-28T12:14:46.560051Z","bundle_sha256":"4d6b013a5028e336799a3d1e0a60d875712973546d15b8d313497249601e52b0"}}