{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:G5GH7ARRQUN6G6MYXVAQCVEA3U","short_pith_number":"pith:G5GH7ARR","schema_version":"1.0","canonical_sha256":"374c7f8231851be37998bd41015480dd2e1a777c19003b06172dcbe42f2fd8b0","source":{"kind":"arxiv","id":"2606.16682","version":2},"attestation_state":"computed","paper":{"title":"Multimodal Evaluator Preference Collapse: Cross-Modal Contagion in Self-Evolving Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Zewen Liu","submitted_at":"2026-06-15T13:18:20Z","abstract_excerpt":"When AI agents use language models to evaluate their own outputs in a\n  feedback loop, systematic biases emerge. We show that Evaluator Preference\n  Collapse (EPC) is dramatically amplified in multimodal settings. Using\n  GPT-4o to evaluate DeepSeek-chat across text and visual tasks, we find\n  that a single strategy (step_by_step) absorbs 48.4% of all weight -- 3.2x\n  the collapse observed in text-only self-evaluation -- while three\n  visual-domain strategies receive only 9.1% combined weight. We then\n  demonstrate a novel phenomenon we term cross-modal contagion: evaluator\n  preferences acqui"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.16682","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-15T13:18:20Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"fc1417405177012e73e2129833a2c415d00d5d9d8350d4ce2047816c3edff074","abstract_canon_sha256":"077161f4faa9ca5c1fc00ab7adcd0b9eb68eeb64ca6699f902987d4d161fd34e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:56.487022Z","signature_b64":"iGuYmzLCX4NHa/EpdQ7LziSYtf686CCQAqjVxZGUto4uYoxkt69r1pG5g1734n26igs1jHFd9sxceBx8I+pbCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"374c7f8231851be37998bd41015480dd2e1a777c19003b06172dcbe42f2fd8b0","last_reissued_at":"2026-06-19T16:12:56.486656Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:56.486656Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Multimodal Evaluator Preference Collapse: Cross-Modal Contagion in Self-Evolving Agents","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Zewen Liu","submitted_at":"2026-06-15T13:18:20Z","abstract_excerpt":"When AI agents use language models to evaluate their own outputs in a\n  feedback loop, systematic biases emerge. We show that Evaluator Preference\n  Collapse (EPC) is dramatically amplified in multimodal settings. Using\n  GPT-4o to evaluate DeepSeek-chat across text and visual tasks, we find\n  that a single strategy (step_by_step) absorbs 48.4% of all weight -- 3.2x\n  the collapse observed in text-only self-evaluation -- while three\n  visual-domain strategies receive only 9.1% combined weight. We then\n  demonstrate a novel phenomenon we term cross-modal contagion: evaluator\n  preferences acqui"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.16682","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.16682/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.16682","created_at":"2026-06-19T16:12:56.486718+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.16682v2","created_at":"2026-06-19T16:12:56.486718+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.16682","created_at":"2026-06-19T16:12:56.486718+00:00"},{"alias_kind":"pith_short_12","alias_value":"G5GH7ARRQUN6","created_at":"2026-06-19T16:12:56.486718+00:00"},{"alias_kind":"pith_short_16","alias_value":"G5GH7ARRQUN6G6MY","created_at":"2026-06-19T16:12:56.486718+00:00"},{"alias_kind":"pith_short_8","alias_value":"G5GH7ARR","created_at":"2026-06-19T16:12:56.486718+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U","json":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U.json","graph_json":"https://pith.science/api/pith-number/G5GH7ARRQUN6G6MYXVAQCVEA3U/graph.json","events_json":"https://pith.science/api/pith-number/G5GH7ARRQUN6G6MYXVAQCVEA3U/events.json","paper":"https://pith.science/paper/G5GH7ARR"},"agent_actions":{"view_html":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U","download_json":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U.json","view_paper":"https://pith.science/paper/G5GH7ARR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.16682&json=true","fetch_graph":"https://pith.science/api/pith-number/G5GH7ARRQUN6G6MYXVAQCVEA3U/graph.json","fetch_events":"https://pith.science/api/pith-number/G5GH7ARRQUN6G6MYXVAQCVEA3U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U/action/storage_attestation","attest_author":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U/action/author_attestation","sign_citation":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U/action/citation_signature","submit_replication":"https://pith.science/pith/G5GH7ARRQUN6G6MYXVAQCVEA3U/action/replication_record"}},"created_at":"2026-06-19T16:12:56.486718+00:00","updated_at":"2026-06-19T16:12:56.486718+00:00"}