{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YAGX4P3DUXM6PI4AUQPZVK3RJN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d7ad830213bbfaa4f34c7ae1d28792b25fe8353fbc53f2e56c106155751bbe6b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-16T12:01:04Z","title_canon_sha256":"beb3178ecbdb85edbc2ca2e09f733f4131143ee911495293b4d2ced987e8325a"},"schema_version":"1.0","source":{"id":"2605.16949","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.16949","created_at":"2026-05-20T00:03:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.16949v1","created_at":"2026-05-20T00:03:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16949","created_at":"2026-05-20T00:03:32Z"},{"alias_kind":"pith_short_12","alias_value":"YAGX4P3DUXM6","created_at":"2026-05-20T00:03:32Z"},{"alias_kind":"pith_short_16","alias_value":"YAGX4P3DUXM6PI4A","created_at":"2026-05-20T00:03:32Z"},{"alias_kind":"pith_short_8","alias_value":"YAGX4P3D","created_at":"2026-05-20T00:03:32Z"}],"graph_snapshots":[{"event_id":"sha256:84efc869eadf56668403c73cdca212aa580cf5524acb59ecf9357206836c4523","target":"graph","created_at":"2026-05-20T00:03:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"By encouraging the model to internalize holistic spatial layouts and structural correlations from pre-trained features, sREPA achieves faster and more stable convergence, along with improved sample quality, compared to state-of-the-art alignment strategies."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That point-wise matching objectives are insufficient to capture the rich spatial topology of visual representations and that an explicit structural constraint on relational geometry will transfer this topology more effectively."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"sREPA enforces structural consistency in relational geometry of pre-trained vision features to accelerate DiT training and improve generation quality."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Structural alignment of relational geometry in features accelerates Diffusion Transformer training and improves sample quality."}],"snapshot_sha256":"6074a1f87421b7b67f63e069fbd51d0673c48571aa5b0fd1ecc7bac93b4ca406"},"formal_canon":{"evidence_count":3,"snapshot_sha256":"ecd89ca87dc7c3c24c19364c825f47394b85ae2ff8add1b264d01f7c07c9f998"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T21:01:19.108306Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T20:50:51.242078Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"cited_work_retraction","ran_at":"2026-05-19T19:52:11.328303Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T18:41:56.240017Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.323329Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.16949/integrity.json","findings":[],"snapshot_sha256":"a1eeb7243df3fafeb44e77a775d4b0e46d7f11697b183df3d07b44f29887b792","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent advances in Diffusion Transformers (DiTs) demonstrate that aligning noisy latent states with well-trained semantic features-as pioneered by Representation Alignment (REPA)-can substantially accelerate training and improve generation fidelity. Subsequent analysis(e.g., iREPA) suggests that these gains arise primarily from transferring spatial structure contained in pre-trained vision representations. However, mostly existing alignment methods employ point-wise matching objectives or rely on implicit architectural tweaks, which fail to explicitly model the spatial relational geometry inhe","authors_text":"Houqiang Li, Litong Gong, Shaodong Xu, Tiezheng Ge, Wengang Zhou, Zexian Li, Zhendong Wang","cross_cats":[],"headline":"Structural alignment of relational geometry in features accelerates Diffusion Transformer training and improves sample quality.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-16T12:01:04Z","title":"Beyond Point-Wise Matching: Structural Representation Alignment for Accelerating Diffusion Transformers"},"references":{"count":46,"internal_anchors":14,"resolved_work":46,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Self-supervised learning from images with a joint-embedding predictive architecture","work_id":"4336ef84-2bc6-4a25-a713-cefd1f8eea15","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Video generation models as world simulators.OpenAI Blog, 1(8):1, 2024","work_id":"9da2c350-7ff5-4ba2-98a7-c641ea0ab2bd","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"An empirical study of training self-supervised vision transformers","work_id":"2574a999-f6c9-45a0-9375-dc9a96e1fa2e","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Imagenet: A large-scale hierarchical image database","work_id":"cc291e4b-478b-4e79-ab37-d782c8e1888e","year":2009},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Diffusion models beat gans on image synthesis.Advances in neural information processing systems, 34:8780–8794","work_id":"3bd99a68-1f13-405d-b9a9-8934c9454ce4","year":2021}],"snapshot_sha256":"077b547bbda3f5dcbf192e199394f4e88b67c879311b46405cc91609d52d5937"},"source":{"id":"2605.16949","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-19T20:46:10.172474Z","id":"3750419e-ba72-4662-80d1-0c7618d1de4b","model_set":{"reader":"grok-4.3"},"one_line_summary":"sREPA enforces structural consistency in relational geometry of pre-trained vision features to accelerate DiT training and improve generation quality.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Structural alignment of relational geometry in features accelerates Diffusion Transformer training and improves sample quality.","strongest_claim":"By encouraging the model to internalize holistic spatial layouts and structural correlations from pre-trained features, sREPA achieves faster and more stable convergence, along with improved sample quality, compared to state-of-the-art alignment strategies.","weakest_assumption":"That point-wise matching objectives are insufficient to capture the rich spatial topology of visual representations and that an explicit structural constraint on relational geometry will transfer this topology more effectively."}},"verdict_id":"3750419e-ba72-4662-80d1-0c7618d1de4b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8607dc96a6c575b23c53c2c28696ca09e55d54f9b247225f18e34dd270d9c62f","target":"record","created_at":"2026-05-20T00:03:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d7ad830213bbfaa4f34c7ae1d28792b25fe8353fbc53f2e56c106155751bbe6b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-16T12:01:04Z","title_canon_sha256":"beb3178ecbdb85edbc2ca2e09f733f4131143ee911495293b4d2ced987e8325a"},"schema_version":"1.0","source":{"id":"2605.16949","kind":"arxiv","version":1}},"canonical_sha256":"c00d7e3f63a5d9e7a380a41f9aab714b5a24fe0e07e8d323ffe54ad3284b5067","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c00d7e3f63a5d9e7a380a41f9aab714b5a24fe0e07e8d323ffe54ad3284b5067","first_computed_at":"2026-05-20T00:03:32.459882Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:32.459882Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Wxzff1Nx2/cxcPvpI2qPL1i27slGs3kz4QqAfBt8pG49MAsmj/QByAZcBlnYyabAygkRsPiLRA/NEXPaapwqAA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:32.460545Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.16949","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8607dc96a6c575b23c53c2c28696ca09e55d54f9b247225f18e34dd270d9c62f","sha256:84efc869eadf56668403c73cdca212aa580cf5524acb59ecf9357206836c4523"],"state_sha256":"412f2a1637af8811fc62eb43f03b599727bf75b89ae0e1d6322298454d4715c2"}