{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:O5EJ276G42S3GXL6P4OYRAPMVC","short_pith_number":"pith:O5EJ276G","schema_version":"1.0","canonical_sha256":"77489d7fc6e6a5b35d7e7f1d8881eca8a5ba3331f457d91e1b57219c80023fb4","source":{"kind":"arxiv","id":"2605.20808","version":1},"attestation_state":"computed","paper":{"title":"Spatial Gram Alignment for Ultra-High-Resolution Image Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Di Huang, Jinjin Zhang, Xiefan Guo","submitted_at":"2026-05-20T06:59:12Z","abstract_excerpt":"Modern ultra-high-resolution image synthesis relies heavily on the robust generative capacity of large-scale pre-trained Latent Diffusion Models (LDMs). While recent representation alignment methods have proven effective by distilling visual priors from foundation models (e.g., SAM or DINO) into generative latent features, scaling these approaches to pre-trained LDMs at extreme resolutions exposes a critical learnability-fidelity conflict. Specifically, forcing direct patch-wise feature distillation inherently perturbs the pre-trained latent manifold, ultimately leading to generation degradati"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.20808","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T06:59:12Z","cross_cats_sorted":[],"title_canon_sha256":"89979c13263c13fc9630e72b5dcc72fab569b4086b4633faea7a56408083b7ce","abstract_canon_sha256":"58ffb7b6c3cde6b3f485c75d32dbbbfd7ac0ff3441db58b983e264331a097b18"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:55.601491Z","signature_b64":"U+Zd+1ejMw6GNYbs17f9C63KEoMonh5/fRcoQhOtsomTszK1sRXY6SgAOmLHBXr+FA9HylIeUEYZIMJX3o6uBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"77489d7fc6e6a5b35d7e7f1d8881eca8a5ba3331f457d91e1b57219c80023fb4","last_reissued_at":"2026-05-21T01:04:55.600595Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:55.600595Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Spatial Gram Alignment for Ultra-High-Resolution Image Synthesis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Di Huang, Jinjin Zhang, Xiefan Guo","submitted_at":"2026-05-20T06:59:12Z","abstract_excerpt":"Modern ultra-high-resolution image synthesis relies heavily on the robust generative capacity of large-scale pre-trained Latent Diffusion Models (LDMs). While recent representation alignment methods have proven effective by distilling visual priors from foundation models (e.g., SAM or DINO) into generative latent features, scaling these approaches to pre-trained LDMs at extreme resolutions exposes a critical learnability-fidelity conflict. Specifically, forcing direct patch-wise feature distillation inherently perturbs the pre-trained latent manifold, ultimately leading to generation degradati"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20808","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20808/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.20808","created_at":"2026-05-21T01:04:55.600744+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.20808v1","created_at":"2026-05-21T01:04:55.600744+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20808","created_at":"2026-05-21T01:04:55.600744+00:00"},{"alias_kind":"pith_short_12","alias_value":"O5EJ276G42S3","created_at":"2026-05-21T01:04:55.600744+00:00"},{"alias_kind":"pith_short_16","alias_value":"O5EJ276G42S3GXL6","created_at":"2026-05-21T01:04:55.600744+00:00"},{"alias_kind":"pith_short_8","alias_value":"O5EJ276G","created_at":"2026-05-21T01:04:55.600744+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC","json":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC.json","graph_json":"https://pith.science/api/pith-number/O5EJ276G42S3GXL6P4OYRAPMVC/graph.json","events_json":"https://pith.science/api/pith-number/O5EJ276G42S3GXL6P4OYRAPMVC/events.json","paper":"https://pith.science/paper/O5EJ276G"},"agent_actions":{"view_html":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC","download_json":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC.json","view_paper":"https://pith.science/paper/O5EJ276G","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.20808&json=true","fetch_graph":"https://pith.science/api/pith-number/O5EJ276G42S3GXL6P4OYRAPMVC/graph.json","fetch_events":"https://pith.science/api/pith-number/O5EJ276G42S3GXL6P4OYRAPMVC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC/action/storage_attestation","attest_author":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC/action/author_attestation","sign_citation":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC/action/citation_signature","submit_replication":"https://pith.science/pith/O5EJ276G42S3GXL6P4OYRAPMVC/action/replication_record"}},"created_at":"2026-05-21T01:04:55.600744+00:00","updated_at":"2026-05-21T01:04:55.600744+00:00"}