{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OVXT5AM53M4ARVHZ2NVN3WAI3P","short_pith_number":"pith:OVXT5AM5","schema_version":"1.0","canonical_sha256":"756f3e819ddb3808d4f9d36addd808dbccc3998fa696ff9dfd823f8d99091713","source":{"kind":"arxiv","id":"2607.00685","version":1},"attestation_state":"computed","paper":{"title":"M2Note: Continual Evolution of Vision Language Models via Mistake Notebook Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.MA","authors_text":"Haiwen Li, Jing Tang, Lei Sun, Rui Chen, Xiangxiang Chu","submitted_at":"2026-07-01T09:30:10Z","abstract_excerpt":"Vision Language Models (VLMs) have demonstrated remarkable capabilities in multimodal reasoning tasks, yet they still suffer from recurring failures, such as skipping key visual checks, misapplying domain rules, and hallucinating unsupported concepts. Most existing solutions rely on supervised fine-tuning (SFT) and reinforcement learning (RL), which are expensive to iterate and can be brittle under distribution shift. To this end, we propose Multimodal Mistake Notebook Learning (M2Note), a training-free continual evolution framework that externalizes learning into an editable memory. M2Note tr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2607.00685","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MA","submitted_at":"2026-07-01T09:30:10Z","cross_cats_sorted":[],"title_canon_sha256":"9b7599c8ced5fb06b6b40399e868a873f6233ffc24fa45d3fe2a5fb7f24ce28c","abstract_canon_sha256":"510595dc34e6d60ca9714d37ba8bcbbfe769837d0874e49adc12af960703d70d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:17:51.551168Z","signature_b64":"ywsSTmfCsPlPDMCqlRAypb0ChGrF6O73khkvcfQBlpL88k9js6yMA1Kw7gVICnR/mlOBLRQpKIlCD/5TsS5gCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"756f3e819ddb3808d4f9d36addd808dbccc3998fa696ff9dfd823f8d99091713","last_reissued_at":"2026-07-02T01:17:51.550748Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:17:51.550748Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"M2Note: Continual Evolution of Vision Language Models via Mistake Notebook Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.MA","authors_text":"Haiwen Li, Jing Tang, Lei Sun, Rui Chen, Xiangxiang Chu","submitted_at":"2026-07-01T09:30:10Z","abstract_excerpt":"Vision Language Models (VLMs) have demonstrated remarkable capabilities in multimodal reasoning tasks, yet they still suffer from recurring failures, such as skipping key visual checks, misapplying domain rules, and hallucinating unsupported concepts. Most existing solutions rely on supervised fine-tuning (SFT) and reinforcement learning (RL), which are expensive to iterate and can be brittle under distribution shift. To this end, we propose Multimodal Mistake Notebook Learning (M2Note), a training-free continual evolution framework that externalizes learning into an editable memory. M2Note tr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.00685","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.00685/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2607.00685","created_at":"2026-07-02T01:17:51.550813+00:00"},{"alias_kind":"arxiv_version","alias_value":"2607.00685v1","created_at":"2026-07-02T01:17:51.550813+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.00685","created_at":"2026-07-02T01:17:51.550813+00:00"},{"alias_kind":"pith_short_12","alias_value":"OVXT5AM53M4A","created_at":"2026-07-02T01:17:51.550813+00:00"},{"alias_kind":"pith_short_16","alias_value":"OVXT5AM53M4ARVHZ","created_at":"2026-07-02T01:17:51.550813+00:00"},{"alias_kind":"pith_short_8","alias_value":"OVXT5AM5","created_at":"2026-07-02T01:17:51.550813+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P","json":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P.json","graph_json":"https://pith.science/api/pith-number/OVXT5AM53M4ARVHZ2NVN3WAI3P/graph.json","events_json":"https://pith.science/api/pith-number/OVXT5AM53M4ARVHZ2NVN3WAI3P/events.json","paper":"https://pith.science/paper/OVXT5AM5"},"agent_actions":{"view_html":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P","download_json":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P.json","view_paper":"https://pith.science/paper/OVXT5AM5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2607.00685&json=true","fetch_graph":"https://pith.science/api/pith-number/OVXT5AM53M4ARVHZ2NVN3WAI3P/graph.json","fetch_events":"https://pith.science/api/pith-number/OVXT5AM53M4ARVHZ2NVN3WAI3P/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P/action/storage_attestation","attest_author":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P/action/author_attestation","sign_citation":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P/action/citation_signature","submit_replication":"https://pith.science/pith/OVXT5AM53M4ARVHZ2NVN3WAI3P/action/replication_record"}},"created_at":"2026-07-02T01:17:51.550813+00:00","updated_at":"2026-07-02T01:17:51.550813+00:00"}