{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:EN7YVYKC2ND7754X4YS33SN2WE","short_pith_number":"pith:EN7YVYKC","schema_version":"1.0","canonical_sha256":"237f8ae142d347fff797e625bdc9bab1247b40eef0ec231f87e3081bf284f65a","source":{"kind":"arxiv","id":"2605.27235","version":1},"attestation_state":"computed","paper":{"title":"MRT: Masked Region Transformer for Layered Image Generation and Editing at Scale","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Ethan Smith, Jingye Chen, Mohan Zhou, Yalong Bai, Yifan Pu, Yuchi Liu, Yuhui Yuan, Zhao Zhang, Zhicong Tang","submitted_at":"2026-05-26T16:16:19Z","abstract_excerpt":"Layered image generation and editing is a fundamental capability that enables layer-wise reuse, editing, and composition of generated visual content, analogous to word-level editing in natural language. Despite its importance, this remains an underexplored area at scale. To address this gap, we present MRT, a 20B-parameter masked region diffusion model tailored for multi-layer transparent image generation and editing, trained on over 10M multilingual design samples spanning diverse aspect ratios and textual prompts. To fully leverage this scale, we make two key technical contributions. First, "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.27235","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-26T16:16:19Z","cross_cats_sorted":[],"title_canon_sha256":"53260f3d34407aa591e77611e42e1d4b5e9f074f82dac85b9caaa3cd1478fc73","abstract_canon_sha256":"c8b160151592a8d782e96fed0dec732d1eff03fc68d808d7f1684de314dfc486"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T02:06:07.026069Z","signature_b64":"/MS+v+nXc6syH0xgXdMGw3fPYmpSjWSJauxmlCW62VdTZd4yiAY6d4I+fudEIXgVPW59Zr0c36XHuM6futLzAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"237f8ae142d347fff797e625bdc9bab1247b40eef0ec231f87e3081bf284f65a","last_reissued_at":"2026-05-27T02:06:07.025375Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T02:06:07.025375Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MRT: Masked Region Transformer for Layered Image Generation and Editing at Scale","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Ethan Smith, Jingye Chen, Mohan Zhou, Yalong Bai, Yifan Pu, Yuchi Liu, Yuhui Yuan, Zhao Zhang, Zhicong Tang","submitted_at":"2026-05-26T16:16:19Z","abstract_excerpt":"Layered image generation and editing is a fundamental capability that enables layer-wise reuse, editing, and composition of generated visual content, analogous to word-level editing in natural language. Despite its importance, this remains an underexplored area at scale. To address this gap, we present MRT, a 20B-parameter masked region diffusion model tailored for multi-layer transparent image generation and editing, trained on over 10M multilingual design samples spanning diverse aspect ratios and textual prompts. To fully leverage this scale, we make two key technical contributions. First, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27235","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27235/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.27235","created_at":"2026-05-27T02:06:07.025484+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.27235v1","created_at":"2026-05-27T02:06:07.025484+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27235","created_at":"2026-05-27T02:06:07.025484+00:00"},{"alias_kind":"pith_short_12","alias_value":"EN7YVYKC2ND7","created_at":"2026-05-27T02:06:07.025484+00:00"},{"alias_kind":"pith_short_16","alias_value":"EN7YVYKC2ND7754X","created_at":"2026-05-27T02:06:07.025484+00:00"},{"alias_kind":"pith_short_8","alias_value":"EN7YVYKC","created_at":"2026-05-27T02:06:07.025484+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE","json":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE.json","graph_json":"https://pith.science/api/pith-number/EN7YVYKC2ND7754X4YS33SN2WE/graph.json","events_json":"https://pith.science/api/pith-number/EN7YVYKC2ND7754X4YS33SN2WE/events.json","paper":"https://pith.science/paper/EN7YVYKC"},"agent_actions":{"view_html":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE","download_json":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE.json","view_paper":"https://pith.science/paper/EN7YVYKC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.27235&json=true","fetch_graph":"https://pith.science/api/pith-number/EN7YVYKC2ND7754X4YS33SN2WE/graph.json","fetch_events":"https://pith.science/api/pith-number/EN7YVYKC2ND7754X4YS33SN2WE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE/action/storage_attestation","attest_author":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE/action/author_attestation","sign_citation":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE/action/citation_signature","submit_replication":"https://pith.science/pith/EN7YVYKC2ND7754X4YS33SN2WE/action/replication_record"}},"created_at":"2026-05-27T02:06:07.025484+00:00","updated_at":"2026-05-27T02:06:07.025484+00:00"}