{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:IJ3L5UAQ2NP5GQFZE7CMPQAAGB","short_pith_number":"pith:IJ3L5UAQ","schema_version":"1.0","canonical_sha256":"4276bed010d35fd340b927c4c7c0003053a3ef69893ab3379a7d7cb1090a88f2","source":{"kind":"arxiv","id":"2605.12714","version":1},"attestation_state":"computed","paper":{"title":"Layer-wise Representation Dynamics: An Empirical Investigation Across Embedders and Base LLMs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Layer-wise dynamics in language models reveal performance signals beyond final representations.","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Jingzhou Jiang, Kar Yan Tam, Yi Yang","submitted_at":"2026-05-12T20:22:45Z","abstract_excerpt":"Hidden states change substantially across the layers of modern language models, but most layer-wise analyses focus on one aspect of that change. We propose Layer-wise Representation Dynamics (LRD), a framework with three layer-wise measurement families: Frenet (Grassmann speed and curvature) for global subspace motion, Neighborhood Retention Score (NRS) for local nearest-neighbor retention, and Graph Filtration Mutual Information (GFMI) for alignment with the final layer. Applying LRD to 31 models (encoder-based and decoder-based embedders, plus base LLMs) on 30 MTEB tasks reveals architectura"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.12714","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:22:45Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"97bad1d980c31a6c0d7217e1c85058ab6b563cfd7aab6b18cbb3b4e82b24fa8c","abstract_canon_sha256":"2dbab8f8ccbd0cebd9692cdd441dd1ad8c9dec3c8a50c89e89dd8e3558a65434"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:49.500490Z","signature_b64":"/qOLmEEVCHGFtKsNnUV8V08Ly2Ikse25/od02P+ceMwhfaZgV4zr++Njcjt+/+Z7wZT0AGKf85ZQAhZzWJZACg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4276bed010d35fd340b927c4c7c0003053a3ef69893ab3379a7d7cb1090a88f2","last_reissued_at":"2026-05-18T03:09:49.499594Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:49.499594Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Layer-wise Representation Dynamics: An Empirical Investigation Across Embedders and Base LLMs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Layer-wise dynamics in language models reveal performance signals beyond final representations.","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Jingzhou Jiang, Kar Yan Tam, Yi Yang","submitted_at":"2026-05-12T20:22:45Z","abstract_excerpt":"Hidden states change substantially across the layers of modern language models, but most layer-wise analyses focus on one aspect of that change. We propose Layer-wise Representation Dynamics (LRD), a framework with three layer-wise measurement families: Frenet (Grassmann speed and curvature) for global subspace motion, Neighborhood Retention Score (NRS) for local nearest-neighbor retention, and Graph Filtration Mutual Information (GFMI) for alignment with the final layer. Applying LRD to 31 models (encoder-based and decoder-based embedders, plus base LLMs) on 30 MTEB tasks reveals architectura"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Applying LRD to 31 models on 30 MTEB tasks reveals architectural and task-level differences that are not apparent from final-layer representations alone... These results show that layer-wise structure provides signal for both interpretation and deployment decisions.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the three proposed measurements (Frenet, NRS, GFMI) capture dynamics that are causally relevant to downstream performance rather than merely correlated on the tested set of models and tasks.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"LRD framework with Frenet, NRS, and GFMI metrics shows layer-wise structure in 31 models provides usable signal for model selection and pruning on MTEB tasks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Layer-wise dynamics in language models reveal performance signals beyond final representations.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"b6e1b779dc2f0da069b8c09b4670d909665c5b3f0d9bade66ad1ddc8f6add54a"},"source":{"id":"2605.12714","kind":"arxiv","version":1},"verdict":{"id":"d454eae7-13eb-4855-b129-bf9228cf00af","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T21:45:21.962818Z","strongest_claim":"Applying LRD to 31 models on 30 MTEB tasks reveals architectural and task-level differences that are not apparent from final-layer representations alone... These results show that layer-wise structure provides signal for both interpretation and deployment decisions.","one_line_summary":"LRD framework with Frenet, NRS, and GFMI metrics shows layer-wise structure in 31 models provides usable signal for model selection and pruning on MTEB tasks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the three proposed measurements (Frenet, NRS, GFMI) capture dynamics that are causally relevant to downstream performance rather than merely correlated on the tested set of models and tasks.","pith_extraction_headline":"Layer-wise dynamics in language models reveal performance signals beyond final representations."},"references":{"count":78,"sample":[{"doi":"","year":2008,"title":"Princeton University Press","work_id":"b00efaf2-cb0f-41ff-b747-f56be79d133b","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"The Falcon Series of Open Language Models","work_id":"9ef058cb-28ba-4128-b9b7-a707f2fd36b3","ref_index":2,"cited_arxiv_id":"2311.16867","is_internal_anchor":true},{"doi":"","year":2003,"title":"Laplacian eigenmaps for dimensionality reduction and data representation.Neural computation, 15(6):1373–1396","work_id":"ed1bd55d-4e5b-400a-8a7e-890c11077e4e","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2006,"title":"Manifold regularization: A geometric framework for learning from labeled and unlabeled examples.Journal of machine learning research, 7(11), 2006","work_id":"90040779-9acf-4503-887d-54c0ac930473","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2016,"title":"A full-text learning to rank dataset for medical information retrieval","work_id":"5d603f1c-c8f0-4adc-b6a8-052eaf2ce678","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":78,"snapshot_sha256":"0a5fdadda6775140c24377c3bcc5e479c74cf57452446e81a91d6569e112585e","internal_anchors":10},"formal_canon":{"evidence_count":2,"snapshot_sha256":"839a6326f7635f51ba8302bd0233610f2699c6505925dde487886fdb60276dc2"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.12714","created_at":"2026-05-18T03:09:49.499744+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.12714v1","created_at":"2026-05-18T03:09:49.499744+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12714","created_at":"2026-05-18T03:09:49.499744+00:00"},{"alias_kind":"pith_short_12","alias_value":"IJ3L5UAQ2NP5","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"IJ3L5UAQ2NP5GQFZ","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"IJ3L5UAQ","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB","json":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB.json","graph_json":"https://pith.science/api/pith-number/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/graph.json","events_json":"https://pith.science/api/pith-number/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/events.json","paper":"https://pith.science/paper/IJ3L5UAQ"},"agent_actions":{"view_html":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB","download_json":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB.json","view_paper":"https://pith.science/paper/IJ3L5UAQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.12714&json=true","fetch_graph":"https://pith.science/api/pith-number/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/graph.json","fetch_events":"https://pith.science/api/pith-number/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/action/storage_attestation","attest_author":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/action/author_attestation","sign_citation":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/action/citation_signature","submit_replication":"https://pith.science/pith/IJ3L5UAQ2NP5GQFZE7CMPQAAGB/action/replication_record"}},"created_at":"2026-05-18T03:09:49.499744+00:00","updated_at":"2026-05-18T03:09:49.499744+00:00"}