{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:U57Y3URSL4AEN3K5MUQNIS7LIU","short_pith_number":"pith:U57Y3URS","schema_version":"1.0","canonical_sha256":"a77f8dd2325f0046ed5d6520d44beb453c23cf40cdd34256561e3c45b423cef4","source":{"kind":"arxiv","id":"2605.18324","version":1},"attestation_state":"computed","paper":{"title":"Improved Baselines with Representation Autoencoders","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.GR","cs.LG","stat.ML"],"primary_cat":"cs.CV","authors_text":"Boyang Zheng, Eli Shechtman, Jaskirat Singh, Richard Zhang, Saining Xie, Zongze Wu","submitted_at":"2026-05-18T12:42:34Z","abstract_excerpt":"Representation Autoencoders (RAE) replace traditional VAE with pretrained vision encoders. In this paper, we systematically investigate several design choices and find three insights which simplify and improve RAE. First, we study a generalized formulation where the representation is defined as sum of the last k encoder layers rather than solely the final layer. This simple change greatly improves reconstruction without encoder finetuning or specialized data (e.g., text, faces). Second, we study the prevalent assumption that RAE (using pretrained representation as encoder) replaces representat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.18324","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-18T12:42:34Z","cross_cats_sorted":["cs.AI","cs.GR","cs.LG","stat.ML"],"title_canon_sha256":"45bca036305d4fd671afd8495b5012b382f4f6d05e81cf7bfbc345464443e166","abstract_canon_sha256":"71e6ac7181b2836ff654a9580c426b0764683635a35cf1e81276df2ac6ea776f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:05:55.252057Z","signature_b64":"sx8eRzzIPDv7UgD+ueBR3oZbz7TfESQS+gnbRCJziW3LYqRS0VZXRc0QsysdaaYv4NIi3ELYePP8lSWhTG1ODA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a77f8dd2325f0046ed5d6520d44beb453c23cf40cdd34256561e3c45b423cef4","last_reissued_at":"2026-05-20T00:05:55.251245Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:05:55.251245Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Improved Baselines with Representation Autoencoders","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.GR","cs.LG","stat.ML"],"primary_cat":"cs.CV","authors_text":"Boyang Zheng, Eli Shechtman, Jaskirat Singh, Richard Zhang, Saining Xie, Zongze Wu","submitted_at":"2026-05-18T12:42:34Z","abstract_excerpt":"Representation Autoencoders (RAE) replace traditional VAE with pretrained vision encoders. In this paper, we systematically investigate several design choices and find three insights which simplify and improve RAE. First, we study a generalized formulation where the representation is defined as sum of the last k encoder layers rather than solely the final layer. This simple change greatly improves reconstruction without encoder finetuning or specialized data (e.g., text, faces). Second, we study the prevalent assumption that RAE (using pretrained representation as encoder) replaces representat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18324","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18324/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.184168Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T23:21:58.857460Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"4d4d37b56f5dd45e8796933b9021a540f73f9f433f6bd229aaf7dc3928440607"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.18324","created_at":"2026-05-20T00:05:55.251401+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.18324v1","created_at":"2026-05-20T00:05:55.251401+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18324","created_at":"2026-05-20T00:05:55.251401+00:00"},{"alias_kind":"pith_short_12","alias_value":"U57Y3URSL4AE","created_at":"2026-05-20T00:05:55.251401+00:00"},{"alias_kind":"pith_short_16","alias_value":"U57Y3URSL4AEN3K5","created_at":"2026-05-20T00:05:55.251401+00:00"},{"alias_kind":"pith_short_8","alias_value":"U57Y3URS","created_at":"2026-05-20T00:05:55.251401+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU","json":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU.json","graph_json":"https://pith.science/api/pith-number/U57Y3URSL4AEN3K5MUQNIS7LIU/graph.json","events_json":"https://pith.science/api/pith-number/U57Y3URSL4AEN3K5MUQNIS7LIU/events.json","paper":"https://pith.science/paper/U57Y3URS"},"agent_actions":{"view_html":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU","download_json":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU.json","view_paper":"https://pith.science/paper/U57Y3URS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.18324&json=true","fetch_graph":"https://pith.science/api/pith-number/U57Y3URSL4AEN3K5MUQNIS7LIU/graph.json","fetch_events":"https://pith.science/api/pith-number/U57Y3URSL4AEN3K5MUQNIS7LIU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU/action/storage_attestation","attest_author":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU/action/author_attestation","sign_citation":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU/action/citation_signature","submit_replication":"https://pith.science/pith/U57Y3URSL4AEN3K5MUQNIS7LIU/action/replication_record"}},"created_at":"2026-05-20T00:05:55.251401+00:00","updated_at":"2026-05-20T00:05:55.251401+00:00"}