{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4XV7OROVHOAUW6G6EYRBD5ZCZR","short_pith_number":"pith:4XV7OROV","canonical_record":{"source":{"id":"2604.10784","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-12T19:19:04Z","cross_cats_sorted":[],"title_canon_sha256":"c0ce6c133d09ad4dbe174993eb6ca7e7f6a98d0f779c8fc241f4c0d2327b5d6d","abstract_canon_sha256":"3f68419f7443999b29d7ce95845e270f1ad54cc19828d5e3d537a8c0c34ace63"},"schema_version":"1.0"},"canonical_sha256":"e5ebf745d53b814b78de262211f722cc66f4da3c21f11841fd1ed28636c8c08c","source":{"kind":"arxiv","id":"2604.10784","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.10784","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"arxiv_version","alias_value":"2604.10784v2","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.10784","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"pith_short_12","alias_value":"4XV7OROVHOAU","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"pith_short_16","alias_value":"4XV7OROVHOAUW6G6","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"pith_short_8","alias_value":"4XV7OROV","created_at":"2026-05-21T01:04:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4XV7OROVHOAUW6G6EYRBD5ZCZR","target":"record","payload":{"canonical_record":{"source":{"id":"2604.10784","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-12T19:19:04Z","cross_cats_sorted":[],"title_canon_sha256":"c0ce6c133d09ad4dbe174993eb6ca7e7f6a98d0f779c8fc241f4c0d2327b5d6d","abstract_canon_sha256":"3f68419f7443999b29d7ce95845e270f1ad54cc19828d5e3d537a8c0c34ace63"},"schema_version":"1.0"},"canonical_sha256":"e5ebf745d53b814b78de262211f722cc66f4da3c21f11841fd1ed28636c8c08c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:25.742434Z","signature_b64":"ONkxbzKSffjflF3TZsfMhMH0vl4scJeM/VGCIxEHEbWG1RkeO/v3x3Hqf8pE/htIMEk4QGntjmkEMEPGRO3xBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e5ebf745d53b814b78de262211f722cc66f4da3c21f11841fd1ed28636c8c08c","last_reissued_at":"2026-05-21T01:04:25.741665Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:25.741665Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.10784","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zfn8R8OQtwUxEU1K4uQ//xfvxtVNY2SWLEGVxS+8lfbDEuBWCJTAxqNGryk5ep4h5Kr/zm9qio0ynC2TxdBbAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T13:32:26.509197Z"},"content_sha256":"a1a818f7664c39485ac8c3b2291e5034a7938e426e35329b6996dda314219a6f","schema_version":"1.0","event_id":"sha256:a1a818f7664c39485ac8c3b2291e5034a7938e426e35329b6996dda314219a6f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4XV7OROVHOAUW6G6EYRBD5ZCZR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"TorchUMM: A Unified Multimodal Model Codebase for Evaluation, Analysis, and Post-training","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"TorchUMM supplies the first unified codebase for evaluating, analyzing, and post-training diverse unified multimodal models.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Hao Chen, Hayes Bai, Hongyu Zhu, Jindong Wang, Marios Savvides, Pan He, Sharon Li, Wenwen Wang, Yinyi Luo","submitted_at":"2026-04-12T19:19:04Z","abstract_excerpt":"Recent advances in unified multimodal models (UMMs) have led to a proliferation of architectures capable of understanding, generating, and editing across visual and textual modalities. However, developing a unified framework for UMMs remains challenging due to the diversity of model architectures and the heterogeneity of training paradigms and implementation details. In this paper, we present TorchUMM, the first unified codebase for comprehensive evaluation, analysis, and post-training across diverse UMM backbones, tasks, and datasets. TorchUMM supports a broad spectrum of models covering a wi"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"TorchUMM, the first unified codebase for comprehensive evaluation, analysis, and post-training across diverse UMM backbones, tasks, and datasets.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the chosen models, tasks, and datasets are representative enough to produce fair comparisons and that the unified interface does not introduce implementation-specific biases that distort model capabilities.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"TorchUMM is the first unified codebase and benchmark suite for standardized evaluation of diverse unified multimodal models on understanding, generation, and editing tasks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"TorchUMM supplies the first unified codebase for evaluating, analyzing, and post-training diverse unified multimodal models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"c4d77dda56a6e03cb1b1ce8557a8e7c5a5269a4260fb49fd069e6969d4c29518"},"source":{"id":"2604.10784","kind":"arxiv","version":2},"verdict":{"id":"0a58fe64-fb66-4d5a-b175-a71403a04d3e","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T15:28:11.867015Z","strongest_claim":"TorchUMM, the first unified codebase for comprehensive evaluation, analysis, and post-training across diverse UMM backbones, tasks, and datasets.","one_line_summary":"TorchUMM is the first unified codebase and benchmark suite for standardized evaluation of diverse unified multimodal models on understanding, generation, and editing tasks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the chosen models, tasks, and datasets are representative enough to produce fair comparisons and that the unified interface does not introduce implementation-specific biases that distort model capabilities.","pith_extraction_headline":"TorchUMM supplies the first unified codebase for evaluating, analyzing, and post-training diverse unified multimodal models."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.10784/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"0a58fe64-fb66-4d5a-b175-a71403a04d3e"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2rSwmF696sOaiALlNL9RNTLHA6ZfiXa+yzky++HANvvedeozZzWKMYPz3AQ8PWP1Fqp5DJsWq6NPmpBk5xy/DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T13:32:26.509719Z"},"content_sha256":"9651aa92a4170b5576fd7f5fc617dad0e3af0d8860892ffe96ee51694c2c79ac","schema_version":"1.0","event_id":"sha256:9651aa92a4170b5576fd7f5fc617dad0e3af0d8860892ffe96ee51694c2c79ac"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR/bundle.json","state_url":"https://pith.science/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T13:32:26Z","links":{"resolver":"https://pith.science/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR","bundle":"https://pith.science/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR/bundle.json","state":"https://pith.science/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4XV7OROVHOAUW6G6EYRBD5ZCZR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4XV7OROVHOAUW6G6EYRBD5ZCZR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3f68419f7443999b29d7ce95845e270f1ad54cc19828d5e3d537a8c0c34ace63","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-12T19:19:04Z","title_canon_sha256":"c0ce6c133d09ad4dbe174993eb6ca7e7f6a98d0f779c8fc241f4c0d2327b5d6d"},"schema_version":"1.0","source":{"id":"2604.10784","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.10784","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"arxiv_version","alias_value":"2604.10784v2","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.10784","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"pith_short_12","alias_value":"4XV7OROVHOAU","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"pith_short_16","alias_value":"4XV7OROVHOAUW6G6","created_at":"2026-05-21T01:04:25Z"},{"alias_kind":"pith_short_8","alias_value":"4XV7OROV","created_at":"2026-05-21T01:04:25Z"}],"graph_snapshots":[{"event_id":"sha256:9651aa92a4170b5576fd7f5fc617dad0e3af0d8860892ffe96ee51694c2c79ac","target":"graph","created_at":"2026-05-21T01:04:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"TorchUMM, the first unified codebase for comprehensive evaluation, analysis, and post-training across diverse UMM backbones, tasks, and datasets."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the chosen models, tasks, and datasets are representative enough to produce fair comparisons and that the unified interface does not introduce implementation-specific biases that distort model capabilities."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"TorchUMM is the first unified codebase and benchmark suite for standardized evaluation of diverse unified multimodal models on understanding, generation, and editing tasks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"TorchUMM supplies the first unified codebase for evaluating, analyzing, and post-training diverse unified multimodal models."}],"snapshot_sha256":"c4d77dda56a6e03cb1b1ce8557a8e7c5a5269a4260fb49fd069e6969d4c29518"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.10784/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent advances in unified multimodal models (UMMs) have led to a proliferation of architectures capable of understanding, generating, and editing across visual and textual modalities. However, developing a unified framework for UMMs remains challenging due to the diversity of model architectures and the heterogeneity of training paradigms and implementation details. In this paper, we present TorchUMM, the first unified codebase for comprehensive evaluation, analysis, and post-training across diverse UMM backbones, tasks, and datasets. TorchUMM supports a broad spectrum of models covering a wi","authors_text":"Hao Chen, Hayes Bai, Hongyu Zhu, Jindong Wang, Marios Savvides, Pan He, Sharon Li, Wenwen Wang, Yinyi Luo","cross_cats":[],"headline":"TorchUMM supplies the first unified codebase for evaluating, analyzing, and post-training diverse unified multimodal models.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-12T19:19:04Z","title":"TorchUMM: A Unified Multimodal Model Codebase for Evaluation, Analysis, and Post-training"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.10784","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T15:28:11.867015Z","id":"0a58fe64-fb66-4d5a-b175-a71403a04d3e","model_set":{"reader":"grok-4.3"},"one_line_summary":"TorchUMM is the first unified codebase and benchmark suite for standardized evaluation of diverse unified multimodal models on understanding, generation, and editing tasks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"TorchUMM supplies the first unified codebase for evaluating, analyzing, and post-training diverse unified multimodal models.","strongest_claim":"TorchUMM, the first unified codebase for comprehensive evaluation, analysis, and post-training across diverse UMM backbones, tasks, and datasets.","weakest_assumption":"That the chosen models, tasks, and datasets are representative enough to produce fair comparisons and that the unified interface does not introduce implementation-specific biases that distort model capabilities."}},"verdict_id":"0a58fe64-fb66-4d5a-b175-a71403a04d3e"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a1a818f7664c39485ac8c3b2291e5034a7938e426e35329b6996dda314219a6f","target":"record","created_at":"2026-05-21T01:04:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3f68419f7443999b29d7ce95845e270f1ad54cc19828d5e3d537a8c0c34ace63","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-04-12T19:19:04Z","title_canon_sha256":"c0ce6c133d09ad4dbe174993eb6ca7e7f6a98d0f779c8fc241f4c0d2327b5d6d"},"schema_version":"1.0","source":{"id":"2604.10784","kind":"arxiv","version":2}},"canonical_sha256":"e5ebf745d53b814b78de262211f722cc66f4da3c21f11841fd1ed28636c8c08c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e5ebf745d53b814b78de262211f722cc66f4da3c21f11841fd1ed28636c8c08c","first_computed_at":"2026-05-21T01:04:25.741665Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:25.741665Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ONkxbzKSffjflF3TZsfMhMH0vl4scJeM/VGCIxEHEbWG1RkeO/v3x3Hqf8pE/htIMEk4QGntjmkEMEPGRO3xBw==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:25.742434Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.10784","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a1a818f7664c39485ac8c3b2291e5034a7938e426e35329b6996dda314219a6f","sha256:9651aa92a4170b5576fd7f5fc617dad0e3af0d8860892ffe96ee51694c2c79ac"],"state_sha256":"cd820b22068e4d650c6aef0aed343bed56cacbfd7607933f9b08b8f97fa133df"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4nAMUwXYN8SsJteMV6ovQoLjj0TPJToWbCIICrCVkZ9uqeX6WrrzmKoZR0P2AT1eNfUCjeTzfqvpGomsyyIiCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T13:32:26.513689Z","bundle_sha256":"0c7c020154d875603bcb1231a030eb60ca0f6b3af82988b5e66db8c634895332"}}