{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:2W3F2J6XVHMDI3LFEUIZOILSTF","short_pith_number":"pith:2W3F2J6X","canonical_record":{"source":{"id":"2503.11367","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-03-14T13:07:45Z","cross_cats_sorted":[],"title_canon_sha256":"64e099845bc70419a74ce43ec581b28bf8fe068838399064767f2e97317a44f8","abstract_canon_sha256":"81229e4ca4a61b772dc127e36777a055e454efd7e80f05e5903c80ffad780486"},"schema_version":"1.0"},"canonical_sha256":"d5b65d27d7a9d8346d6525119721729966767114609edd5fb5c8930e9b14019d","source":{"kind":"arxiv","id":"2503.11367","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2503.11367","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"arxiv_version","alias_value":"2503.11367v4","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.11367","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"pith_short_12","alias_value":"2W3F2J6XVHMD","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"pith_short_16","alias_value":"2W3F2J6XVHMDI3LF","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"pith_short_8","alias_value":"2W3F2J6X","created_at":"2026-05-26T02:03:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:2W3F2J6XVHMDI3LFEUIZOILSTF","target":"record","payload":{"canonical_record":{"source":{"id":"2503.11367","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-03-14T13:07:45Z","cross_cats_sorted":[],"title_canon_sha256":"64e099845bc70419a74ce43ec581b28bf8fe068838399064767f2e97317a44f8","abstract_canon_sha256":"81229e4ca4a61b772dc127e36777a055e454efd7e80f05e5903c80ffad780486"},"schema_version":"1.0"},"canonical_sha256":"d5b65d27d7a9d8346d6525119721729966767114609edd5fb5c8930e9b14019d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:03:49.545528Z","signature_b64":"WG+ULp/OOvh9RIQrIGURrXSL6ksDWF3m1aYxspg7QXnm+2sgFdFP19ZJNvGbwmRP3E2GN8cpdJKDV/K9zaGLBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d5b65d27d7a9d8346d6525119721729966767114609edd5fb5c8930e9b14019d","last_reissued_at":"2026-05-26T02:03:49.544533Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:03:49.544533Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2503.11367","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T02:03:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lUqrjyXnSu1TXkH4zGh4FuNhtNjF6sH1Tz1vxzEwD5ZD4tjC3+3H0NGjQbyHpXaISeQ3HhlHwubdUYJx3aeLCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T05:38:45.424166Z"},"content_sha256":"15b849e1b07010813188f052cfd7472e9b9af0b70d3a3837e694446cc4fe43ba","schema_version":"1.0","event_id":"sha256:15b849e1b07010813188f052cfd7472e9b9af0b70d3a3837e694446cc4fe43ba"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:2W3F2J6XVHMDI3LFEUIZOILSTF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Efficient Distributed MLLM Training with Cornstarch","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Ang Chen, Insu Jang, Mosharaf Chowdhury, Nikhil Bansal, Runyu Lu","submitted_at":"2025-03-14T13:07:45Z","abstract_excerpt":"Multimodal large language models (MLLMs) extend the capabilities of large language models (LLMs) by combining heterogeneous model architectures to handle diverse modalities like images and audio. However, this inherent heterogeneity in MLLM model structure and data types makes makeshift extensions to existing LLM training frameworks unsuitable for efficient MLLM training. While there are a few works that have attempted to address the heterogeneity in MLLM training, their approaches are limited to only superficially considering the characteristics of MLLMs.\n  In this paper, we present Cornstarc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2503.11367","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2503.11367/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T02:03:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nfwIto0OI+v6IMRcXwA/XLbCcW5kEjSTHyqBubknyDWxvQ0N3tjlJV0dxOZrGZVxEZjhLTFLZSrFrQATbKZBBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T05:38:45.424566Z"},"content_sha256":"c6a6b71f9e0211c15aa31acb0e8c03cfce2350f4a2363460baa72dfabebbb89f","schema_version":"1.0","event_id":"sha256:c6a6b71f9e0211c15aa31acb0e8c03cfce2350f4a2363460baa72dfabebbb89f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2W3F2J6XVHMDI3LFEUIZOILSTF/bundle.json","state_url":"https://pith.science/pith/2W3F2J6XVHMDI3LFEUIZOILSTF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2W3F2J6XVHMDI3LFEUIZOILSTF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T05:38:45Z","links":{"resolver":"https://pith.science/pith/2W3F2J6XVHMDI3LFEUIZOILSTF","bundle":"https://pith.science/pith/2W3F2J6XVHMDI3LFEUIZOILSTF/bundle.json","state":"https://pith.science/pith/2W3F2J6XVHMDI3LFEUIZOILSTF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2W3F2J6XVHMDI3LFEUIZOILSTF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:2W3F2J6XVHMDI3LFEUIZOILSTF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"81229e4ca4a61b772dc127e36777a055e454efd7e80f05e5903c80ffad780486","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-03-14T13:07:45Z","title_canon_sha256":"64e099845bc70419a74ce43ec581b28bf8fe068838399064767f2e97317a44f8"},"schema_version":"1.0","source":{"id":"2503.11367","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2503.11367","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"arxiv_version","alias_value":"2503.11367v4","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2503.11367","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"pith_short_12","alias_value":"2W3F2J6XVHMD","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"pith_short_16","alias_value":"2W3F2J6XVHMDI3LF","created_at":"2026-05-26T02:03:49Z"},{"alias_kind":"pith_short_8","alias_value":"2W3F2J6X","created_at":"2026-05-26T02:03:49Z"}],"graph_snapshots":[{"event_id":"sha256:c6a6b71f9e0211c15aa31acb0e8c03cfce2350f4a2363460baa72dfabebbb89f","target":"graph","created_at":"2026-05-26T02:03:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2503.11367/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Multimodal large language models (MLLMs) extend the capabilities of large language models (LLMs) by combining heterogeneous model architectures to handle diverse modalities like images and audio. However, this inherent heterogeneity in MLLM model structure and data types makes makeshift extensions to existing LLM training frameworks unsuitable for efficient MLLM training. While there are a few works that have attempted to address the heterogeneity in MLLM training, their approaches are limited to only superficially considering the characteristics of MLLMs.\n  In this paper, we present Cornstarc","authors_text":"Ang Chen, Insu Jang, Mosharaf Chowdhury, Nikhil Bansal, Runyu Lu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-03-14T13:07:45Z","title":"Efficient Distributed MLLM Training with Cornstarch"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2503.11367","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:15b849e1b07010813188f052cfd7472e9b9af0b70d3a3837e694446cc4fe43ba","target":"record","created_at":"2026-05-26T02:03:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"81229e4ca4a61b772dc127e36777a055e454efd7e80f05e5903c80ffad780486","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-03-14T13:07:45Z","title_canon_sha256":"64e099845bc70419a74ce43ec581b28bf8fe068838399064767f2e97317a44f8"},"schema_version":"1.0","source":{"id":"2503.11367","kind":"arxiv","version":4}},"canonical_sha256":"d5b65d27d7a9d8346d6525119721729966767114609edd5fb5c8930e9b14019d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d5b65d27d7a9d8346d6525119721729966767114609edd5fb5c8930e9b14019d","first_computed_at":"2026-05-26T02:03:49.544533Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:03:49.544533Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WG+ULp/OOvh9RIQrIGURrXSL6ksDWF3m1aYxspg7QXnm+2sgFdFP19ZJNvGbwmRP3E2GN8cpdJKDV/K9zaGLBg==","signature_status":"signed_v1","signed_at":"2026-05-26T02:03:49.545528Z","signed_message":"canonical_sha256_bytes"},"source_id":"2503.11367","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:15b849e1b07010813188f052cfd7472e9b9af0b70d3a3837e694446cc4fe43ba","sha256:c6a6b71f9e0211c15aa31acb0e8c03cfce2350f4a2363460baa72dfabebbb89f"],"state_sha256":"9c894569ffb0e66a61540c671fa48b084d574d7c0a14a7d8c18ffa0737da8f40"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oT/Z0HpPXfJZISkXWc7gdK4CNoyclnvj4vz1lHmwz9kV4kiHVFsA9vPb1MT973DOXZP3rM23tSO/bhQ16OauBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T05:38:45.426689Z","bundle_sha256":"de24dc34cd3d2c0db08be3debdf5ababd15e15431bb707c466a641fa09a0030f"}}