{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:HAMJZSQVKL4VHWOAINCHUE26II","short_pith_number":"pith:HAMJZSQV","canonical_record":{"source":{"id":"2402.13753","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-21T12:30:33Z","cross_cats_sorted":[],"title_canon_sha256":"46e4af97439ff2209ef39f593912fe97c0c8c33415282cb5b0df3aafaf04abf8","abstract_canon_sha256":"0379a2839dbbe711aa7363393e5effbbca314a73f949a18ec961afaed67dfb0f"},"schema_version":"1.0"},"canonical_sha256":"38189cca1552f953d9c043447a135e420636aa9e5d0a13a0ca99322e4750d280","source":{"kind":"arxiv","id":"2402.13753","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.13753","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"arxiv_version","alias_value":"2402.13753v1","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.13753","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"pith_short_12","alias_value":"HAMJZSQVKL4V","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"HAMJZSQVKL4VHWOA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"HAMJZSQV","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:HAMJZSQVKL4VHWOAINCHUE26II","target":"record","payload":{"canonical_record":{"source":{"id":"2402.13753","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-21T12:30:33Z","cross_cats_sorted":[],"title_canon_sha256":"46e4af97439ff2209ef39f593912fe97c0c8c33415282cb5b0df3aafaf04abf8","abstract_canon_sha256":"0379a2839dbbe711aa7363393e5effbbca314a73f949a18ec961afaed67dfb0f"},"schema_version":"1.0"},"canonical_sha256":"38189cca1552f953d9c043447a135e420636aa9e5d0a13a0ca99322e4750d280","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:53.019823Z","signature_b64":"p9LZId8qixz4wed1Z7pJXaxf/8BRnyoaETcm8ARr6k5FxWyQfWF/N16iogOPGzhILUglFHIiH6Xj4VELzxTHCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"38189cca1552f953d9c043447a135e420636aa9e5d0a13a0ca99322e4750d280","last_reissued_at":"2026-05-17T23:38:53.019275Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:53.019275Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2402.13753","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hEjSQ1ZYGliBcYuHGi0jzKNr7mUqB+EGpgdibdz6LkClwzDi2MOcimUgtp3yw8qqvzYQzG6U45G7JIbhtttcAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T16:59:45.038736Z"},"content_sha256":"b4836c4e150cfc6fdf7eacccd8c04b62637ad68dd62e4e6536ccfd92a15f0075","schema_version":"1.0","event_id":"sha256:b4836c4e150cfc6fdf7eacccd8c04b62637ad68dd62e4e6536ccfd92a15f0075"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:HAMJZSQVKL4VHWOAINCHUE26II","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"LongRoPE: Extending LLM Context Window Beyond 2 Million Tokens","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"LongRoPE extends pre-trained LLMs to 2048k token contexts via targeted non-uniform positional interpolation and a two-stage fine-tuning process.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chengruidong Zhang, Fan Yang, Jiahang Xu, Li Lyna Zhang, Mao Yang, Ning Shang, Yiran Ding, Yuanyuan Xu","submitted_at":"2024-02-21T12:30:33Z","abstract_excerpt":"Large context window is a desirable feature in large language models (LLMs). However, due to high fine-tuning costs, scarcity of long texts, and catastrophic values introduced by new token positions, current extended context windows are limited to around 128k tokens. This paper introduces LongRoPE that, for the first time, extends the context window of pre-trained LLMs to an impressive 2048k tokens, with up to only 1k fine-tuning steps at within 256k training lengths, while maintaining performance at the original short context window. This is achieved by three key innovations: (i) we identify "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"LongRoPE extends the context window of pre-trained LLMs to an impressive 2048k tokens, with up to only 1k fine-tuning steps at within 256k training lengths, while maintaining performance at the original short context window.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The two forms of non-uniformities in positional interpolation identified via efficient search are generalizable across models and tasks and provide a stable initialization that does not overfit to the search data.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"LongRoPE extends LLM context windows to 2048k tokens via search for non-uniform positional interpolation, progressive fine-tuning from 256k, and short-context readjustment.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LongRoPE extends pre-trained LLMs to 2048k token contexts via targeted non-uniform positional interpolation and a two-stage fine-tuning process.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"ae9240a7ec5d1b526adafcf866f0ababd68b9c70aaf16e645576e810486a721a"},"source":{"id":"2402.13753","kind":"arxiv","version":1},"verdict":{"id":"5f8907da-40cb-4089-9569-b9f66b67b1ef","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T08:26:12.371361Z","strongest_claim":"LongRoPE extends the context window of pre-trained LLMs to an impressive 2048k tokens, with up to only 1k fine-tuning steps at within 256k training lengths, while maintaining performance at the original short context window.","one_line_summary":"LongRoPE extends LLM context windows to 2048k tokens via search for non-uniform positional interpolation, progressive fine-tuning from 256k, and short-context readjustment.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The two forms of non-uniformities in positional interpolation identified via efficient search are generalizable across models and tasks and provide a stable initialization that does not overfit to the search data.","pith_extraction_headline":"LongRoPE extends pre-trained LLMs to 2048k token contexts via targeted non-uniform positional interpolation and a two-stage fine-tuning process."},"references":{"count":15,"sample":[{"doi":"","year":null,"title":"Extending Context Window of Large Language Models via Positional Interpolation","work_id":"c8b6df85-e7da-4bd8-90a4-d309cc2a0f60","ref_index":1,"cited_arxiv_id":"2306.15595","is_internal_anchor":true},{"doi":"","year":null,"title":"The Pile: An 800GB Dataset of Diverse Text for Language Modeling","work_id":"9b10667a-da61-4358-aceb-10578234d45d","ref_index":2,"cited_arxiv_id":"2101.00027","is_internal_anchor":true},{"doi":"","year":2020,"title":"Single path one-shot neural architecture search with uniform sampling","work_id":"ff23255d-764c-4c95-b81c-58af1380a665","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Lm-infinite: Simple on-the-fly length generalization for large language models","work_id":"62f73354-8046-4134-b4e3-1bfab8df2d99","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2009,"title":"Measuring Massive Multitask Language Understanding","work_id":"e87ec49a-544b-4ec8-8991-75298c64ff5e","ref_index":5,"cited_arxiv_id":"2009.03300","is_internal_anchor":true}],"resolved_work":15,"snapshot_sha256":"3b493439f6cdafeea298cb4f5c718ef82b2281f1301109427045f495d9c668bc","internal_anchors":6},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a63ce17c0d4a95df8d33c6089c52946c3f888c31ef5a4860fb117ea08d23d5ca"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"5f8907da-40cb-4089-9569-b9f66b67b1ef"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BcaWVpG5p/ldmIZfzAVlht2eOPLHcgf4KZq4lkCL60q8Ox73nNtGX/Ne2+W+qTbMCbzBC2Rwkx7PHDr2doyZBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T16:59:45.039800Z"},"content_sha256":"7b22a4f8151c9ae2a193f9f532c9ebfcf0e68eb212590c1167d4da1aaa03f0ad","schema_version":"1.0","event_id":"sha256:7b22a4f8151c9ae2a193f9f532c9ebfcf0e68eb212590c1167d4da1aaa03f0ad"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HAMJZSQVKL4VHWOAINCHUE26II/bundle.json","state_url":"https://pith.science/pith/HAMJZSQVKL4VHWOAINCHUE26II/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HAMJZSQVKL4VHWOAINCHUE26II/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T16:59:45Z","links":{"resolver":"https://pith.science/pith/HAMJZSQVKL4VHWOAINCHUE26II","bundle":"https://pith.science/pith/HAMJZSQVKL4VHWOAINCHUE26II/bundle.json","state":"https://pith.science/pith/HAMJZSQVKL4VHWOAINCHUE26II/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HAMJZSQVKL4VHWOAINCHUE26II/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:HAMJZSQVKL4VHWOAINCHUE26II","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0379a2839dbbe711aa7363393e5effbbca314a73f949a18ec961afaed67dfb0f","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-21T12:30:33Z","title_canon_sha256":"46e4af97439ff2209ef39f593912fe97c0c8c33415282cb5b0df3aafaf04abf8"},"schema_version":"1.0","source":{"id":"2402.13753","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.13753","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"arxiv_version","alias_value":"2402.13753v1","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.13753","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"pith_short_12","alias_value":"HAMJZSQVKL4V","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"HAMJZSQVKL4VHWOA","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"HAMJZSQV","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:7b22a4f8151c9ae2a193f9f532c9ebfcf0e68eb212590c1167d4da1aaa03f0ad","target":"graph","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"LongRoPE extends the context window of pre-trained LLMs to an impressive 2048k tokens, with up to only 1k fine-tuning steps at within 256k training lengths, while maintaining performance at the original short context window."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The two forms of non-uniformities in positional interpolation identified via efficient search are generalizable across models and tasks and provide a stable initialization that does not overfit to the search data."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"LongRoPE extends LLM context windows to 2048k tokens via search for non-uniform positional interpolation, progressive fine-tuning from 256k, and short-context readjustment."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"LongRoPE extends pre-trained LLMs to 2048k token contexts via targeted non-uniform positional interpolation and a two-stage fine-tuning process."}],"snapshot_sha256":"ae9240a7ec5d1b526adafcf866f0ababd68b9c70aaf16e645576e810486a721a"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"a63ce17c0d4a95df8d33c6089c52946c3f888c31ef5a4860fb117ea08d23d5ca"},"paper":{"abstract_excerpt":"Large context window is a desirable feature in large language models (LLMs). However, due to high fine-tuning costs, scarcity of long texts, and catastrophic values introduced by new token positions, current extended context windows are limited to around 128k tokens. This paper introduces LongRoPE that, for the first time, extends the context window of pre-trained LLMs to an impressive 2048k tokens, with up to only 1k fine-tuning steps at within 256k training lengths, while maintaining performance at the original short context window. This is achieved by three key innovations: (i) we identify ","authors_text":"Chengruidong Zhang, Fan Yang, Jiahang Xu, Li Lyna Zhang, Mao Yang, Ning Shang, Yiran Ding, Yuanyuan Xu","cross_cats":[],"headline":"LongRoPE extends pre-trained LLMs to 2048k token contexts via targeted non-uniform positional interpolation and a two-stage fine-tuning process.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-21T12:30:33Z","title":"LongRoPE: Extending LLM Context Window Beyond 2 Million Tokens"},"references":{"count":15,"internal_anchors":6,"resolved_work":15,"sample":[{"cited_arxiv_id":"2306.15595","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Extending Context Window of Large Language Models via Positional Interpolation","work_id":"c8b6df85-e7da-4bd8-90a4-d309cc2a0f60","year":null},{"cited_arxiv_id":"2101.00027","doi":"","is_internal_anchor":true,"ref_index":2,"title":"The Pile: An 800GB Dataset of Diverse Text for Language Modeling","work_id":"9b10667a-da61-4358-aceb-10578234d45d","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Single path one-shot neural architecture search with uniform sampling","work_id":"ff23255d-764c-4c95-b81c-58af1380a665","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Lm-infinite: Simple on-the-fly length generalization for large language models","work_id":"62f73354-8046-4134-b4e3-1bfab8df2d99","year":null},{"cited_arxiv_id":"2009.03300","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Measuring Massive Multitask Language Understanding","work_id":"e87ec49a-544b-4ec8-8991-75298c64ff5e","year":2009}],"snapshot_sha256":"3b493439f6cdafeea298cb4f5c718ef82b2281f1301109427045f495d9c668bc"},"source":{"id":"2402.13753","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T08:26:12.371361Z","id":"5f8907da-40cb-4089-9569-b9f66b67b1ef","model_set":{"reader":"grok-4.3"},"one_line_summary":"LongRoPE extends LLM context windows to 2048k tokens via search for non-uniform positional interpolation, progressive fine-tuning from 256k, and short-context readjustment.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"LongRoPE extends pre-trained LLMs to 2048k token contexts via targeted non-uniform positional interpolation and a two-stage fine-tuning process.","strongest_claim":"LongRoPE extends the context window of pre-trained LLMs to an impressive 2048k tokens, with up to only 1k fine-tuning steps at within 256k training lengths, while maintaining performance at the original short context window.","weakest_assumption":"The two forms of non-uniformities in positional interpolation identified via efficient search are generalizable across models and tasks and provide a stable initialization that does not overfit to the search data."}},"verdict_id":"5f8907da-40cb-4089-9569-b9f66b67b1ef"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b4836c4e150cfc6fdf7eacccd8c04b62637ad68dd62e4e6536ccfd92a15f0075","target":"record","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0379a2839dbbe711aa7363393e5effbbca314a73f949a18ec961afaed67dfb0f","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-21T12:30:33Z","title_canon_sha256":"46e4af97439ff2209ef39f593912fe97c0c8c33415282cb5b0df3aafaf04abf8"},"schema_version":"1.0","source":{"id":"2402.13753","kind":"arxiv","version":1}},"canonical_sha256":"38189cca1552f953d9c043447a135e420636aa9e5d0a13a0ca99322e4750d280","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"38189cca1552f953d9c043447a135e420636aa9e5d0a13a0ca99322e4750d280","first_computed_at":"2026-05-17T23:38:53.019275Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:53.019275Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"p9LZId8qixz4wed1Z7pJXaxf/8BRnyoaETcm8ARr6k5FxWyQfWF/N16iogOPGzhILUglFHIiH6Xj4VELzxTHCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:53.019823Z","signed_message":"canonical_sha256_bytes"},"source_id":"2402.13753","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b4836c4e150cfc6fdf7eacccd8c04b62637ad68dd62e4e6536ccfd92a15f0075","sha256:7b22a4f8151c9ae2a193f9f532c9ebfcf0e68eb212590c1167d4da1aaa03f0ad"],"state_sha256":"6e751071118b003d6e0367aae6b467bdb00bb11da70587b2f27df2d9ed762703"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iUlw9vu51NAS07WJ8svLLmUvObtsFYQ3zftxp8lr5szTBEUjYihS0o1TlJ++0wZBayIVkSwsfQPoYAC/flaKCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T16:59:45.045409Z","bundle_sha256":"7ce8d342fd9ca1b602a2d443a207db02ea8246ef92f20f18fe9422828d08f368"}}