{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:23WN7ZEAN3CPAY54GIU4PH2JYA","short_pith_number":"pith:23WN7ZEA","schema_version":"1.0","canonical_sha256":"d6ecdfe4806ec4f063bc3229c79f49c03946708bfa977e2d95f74bac60e118c2","source":{"kind":"arxiv","id":"2512.00349","version":2},"attestation_state":"computed","paper":{"title":"Debate with Images: Detecting Deceptive Behaviors in Multimodal Large Language Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Boyuan Chen, Donghai Hong, Jiaming Ji, Jiayi Zhou, Josef Dai, Kaile Wang, Shiyi Hou, Sitong Fang, Yaodong Yang","submitted_at":"2025-11-29T06:39:36Z","abstract_excerpt":"Are frontier AI systems becoming more capable? Certainly. Yet such progress is not an unalloyed blessing but rather a Trojan horse: behind their performance leaps lie more insidious and destructive safety risks, namely deception. Unlike hallucination, which arises from insufficient capability and leads to mistakes, deception represents a deeper threat in which models deliberately mislead users through complex reasoning and insincere responses. As system capabilities advance, deceptive behaviours have spread from textual to multimodal settings, amplifying their potential harm. First and foremos"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.00349","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2025-11-29T06:39:36Z","cross_cats_sorted":[],"title_canon_sha256":"381593be122216161c9afb0fe16494349f90742c425b995d574f74c5ce1d1390","abstract_canon_sha256":"42b0ae92bd19a6b9375d5c390219c0d04bace6805133dbe85ba60b76c6548a64"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:33.622647Z","signature_b64":"AHSfbeazmw5rNb4sZ3sU0K9r/QhIfaVnMLmOGkUieBNi6YOBJ/Eib6au5/PR0+ZkTrlSPHUOkB9K79sZ99CDBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d6ecdfe4806ec4f063bc3229c79f49c03946708bfa977e2d95f74bac60e118c2","last_reissued_at":"2026-05-28T01:04:33.622050Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:33.622050Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Debate with Images: Detecting Deceptive Behaviors in Multimodal Large Language Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Boyuan Chen, Donghai Hong, Jiaming Ji, Jiayi Zhou, Josef Dai, Kaile Wang, Shiyi Hou, Sitong Fang, Yaodong Yang","submitted_at":"2025-11-29T06:39:36Z","abstract_excerpt":"Are frontier AI systems becoming more capable? Certainly. Yet such progress is not an unalloyed blessing but rather a Trojan horse: behind their performance leaps lie more insidious and destructive safety risks, namely deception. Unlike hallucination, which arises from insufficient capability and leads to mistakes, deception represents a deeper threat in which models deliberately mislead users through complex reasoning and insincere responses. As system capabilities advance, deceptive behaviours have spread from textual to multimodal settings, amplifying their potential harm. First and foremos"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.00349","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.00349/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.00349","created_at":"2026-05-28T01:04:33.622111+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.00349v2","created_at":"2026-05-28T01:04:33.622111+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.00349","created_at":"2026-05-28T01:04:33.622111+00:00"},{"alias_kind":"pith_short_12","alias_value":"23WN7ZEAN3CP","created_at":"2026-05-28T01:04:33.622111+00:00"},{"alias_kind":"pith_short_16","alias_value":"23WN7ZEAN3CPAY54","created_at":"2026-05-28T01:04:33.622111+00:00"},{"alias_kind":"pith_short_8","alias_value":"23WN7ZEA","created_at":"2026-05-28T01:04:33.622111+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA","json":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA.json","graph_json":"https://pith.science/api/pith-number/23WN7ZEAN3CPAY54GIU4PH2JYA/graph.json","events_json":"https://pith.science/api/pith-number/23WN7ZEAN3CPAY54GIU4PH2JYA/events.json","paper":"https://pith.science/paper/23WN7ZEA"},"agent_actions":{"view_html":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA","download_json":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA.json","view_paper":"https://pith.science/paper/23WN7ZEA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.00349&json=true","fetch_graph":"https://pith.science/api/pith-number/23WN7ZEAN3CPAY54GIU4PH2JYA/graph.json","fetch_events":"https://pith.science/api/pith-number/23WN7ZEAN3CPAY54GIU4PH2JYA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA/action/storage_attestation","attest_author":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA/action/author_attestation","sign_citation":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA/action/citation_signature","submit_replication":"https://pith.science/pith/23WN7ZEAN3CPAY54GIU4PH2JYA/action/replication_record"}},"created_at":"2026-05-28T01:04:33.622111+00:00","updated_at":"2026-05-28T01:04:33.622111+00:00"}