{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:4WWB5O76PLU35SPIUBMQNBK4PF","short_pith_number":"pith:4WWB5O76","schema_version":"1.0","canonical_sha256":"e5ac1ebbfe7ae9bec9e8a05906855c795ec2c9c9bf38e1f9f8a32b206b250d26","source":{"kind":"arxiv","id":"2410.04941","version":7},"attestation_state":"computed","paper":{"title":"TOAST: Transformer Optimization using Adaptive and Simple Transformations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Bastian Rieck, Emanuele Palumbo, Emanuele Rodol\\`a, Irene Cannistraci, Julia E. Vogt, Simone Antonelli, Thomas M. Sutter","submitted_at":"2024-10-07T11:35:24Z","abstract_excerpt":"Foundation models achieve state-of-the-art performance across different tasks, but their size and computational demands raise concerns about accessibility and sustainability. Existing efficiency methods often require additional retraining or finetuning, limiting their practicality. Recent findings suggest that deep neural networks exhibit internal representation similarities. While such similarities across different models have been exploited for enabling techniques such as model stitching and merging, intra-network redundancy remains underexplored as a source for efficiency gains. In this pap"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2410.04941","kind":"arxiv","version":7},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2024-10-07T11:35:24Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ee896471eef80d67acd300f9faed0f717db13cb54001166aca1d2efce98e6690","abstract_canon_sha256":"458259f753e6bd5e720e8a58b3a3f87015142870af5baa7bf14a5673002874b6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:05.333609Z","signature_b64":"ytle38h2bTdYEwoTXuzgpTeTlnjh2dB5csNGgnAuf9s3Iig5UohCsoYwjUfv0yf88bOZp+KGg6xsCaGoJFJpDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e5ac1ebbfe7ae9bec9e8a05906855c795ec2c9c9bf38e1f9f8a32b206b250d26","last_reissued_at":"2026-05-20T00:04:05.332794Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:05.332794Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TOAST: Transformer Optimization using Adaptive and Simple Transformations","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Bastian Rieck, Emanuele Palumbo, Emanuele Rodol\\`a, Irene Cannistraci, Julia E. Vogt, Simone Antonelli, Thomas M. Sutter","submitted_at":"2024-10-07T11:35:24Z","abstract_excerpt":"Foundation models achieve state-of-the-art performance across different tasks, but their size and computational demands raise concerns about accessibility and sustainability. Existing efficiency methods often require additional retraining or finetuning, limiting their practicality. Recent findings suggest that deep neural networks exhibit internal representation similarities. While such similarities across different models have been exploited for enabling techniques such as model stitching and merging, intra-network redundancy remains underexplored as a source for efficiency gains. In this pap"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.04941","kind":"arxiv","version":7},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2410.04941/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2410.04941","created_at":"2026-05-20T00:04:05.332929+00:00"},{"alias_kind":"arxiv_version","alias_value":"2410.04941v7","created_at":"2026-05-20T00:04:05.332929+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.04941","created_at":"2026-05-20T00:04:05.332929+00:00"},{"alias_kind":"pith_short_12","alias_value":"4WWB5O76PLU3","created_at":"2026-05-20T00:04:05.332929+00:00"},{"alias_kind":"pith_short_16","alias_value":"4WWB5O76PLU35SPI","created_at":"2026-05-20T00:04:05.332929+00:00"},{"alias_kind":"pith_short_8","alias_value":"4WWB5O76","created_at":"2026-05-20T00:04:05.332929+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF","json":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF.json","graph_json":"https://pith.science/api/pith-number/4WWB5O76PLU35SPIUBMQNBK4PF/graph.json","events_json":"https://pith.science/api/pith-number/4WWB5O76PLU35SPIUBMQNBK4PF/events.json","paper":"https://pith.science/paper/4WWB5O76"},"agent_actions":{"view_html":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF","download_json":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF.json","view_paper":"https://pith.science/paper/4WWB5O76","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2410.04941&json=true","fetch_graph":"https://pith.science/api/pith-number/4WWB5O76PLU35SPIUBMQNBK4PF/graph.json","fetch_events":"https://pith.science/api/pith-number/4WWB5O76PLU35SPIUBMQNBK4PF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF/action/storage_attestation","attest_author":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF/action/author_attestation","sign_citation":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF/action/citation_signature","submit_replication":"https://pith.science/pith/4WWB5O76PLU35SPIUBMQNBK4PF/action/replication_record"}},"created_at":"2026-05-20T00:04:05.332929+00:00","updated_at":"2026-05-20T00:04:05.332929+00:00"}