{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:L46JMFBTVBZYUY2RTP5ZTQFEUV","short_pith_number":"pith:L46JMFBT","schema_version":"1.0","canonical_sha256":"5f3c961433a8738a63519bfb99c0a4a5770456f293be53d89f5e1b075557401a","source":{"kind":"arxiv","id":"2606.30460","version":1},"attestation_state":"computed","paper":{"title":"HSAP: A Hierachical Sequence-aware Parallelism for Hybrid-Context Generative Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.LG","authors_text":"Bingyi Jing, Cong Lin, Jiaxing Zhang, Junyu Lu, Songxin Zhang, Zejian Xie, Zhuoyang Song","submitted_at":"2026-06-29T15:26:55Z","abstract_excerpt":"In this paper, we aim to combine the advantages of existing sequence parallelism paradigms and overcomes their drawbacks, the most serious of which is the incapability to correctly compute causal attention on the hybrid-context packed sequences, in a stronger sequence parallelism framework. The practical technique of packing sequences for efficiently pretraining and fine-tuning large language models causes cross-contamination problem in attention computation, which can be effectively solved when no parallelism in the sequence length dimension is taken. However, in sequence parallelism, existin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.30460","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-29T15:26:55Z","cross_cats_sorted":["cs.DC"],"title_canon_sha256":"46078cacac4555b770fa2ce63b0ae4f258dcb0655fbdb95e68f7e2d4aa3eb1ce","abstract_canon_sha256":"789531d1e2c0625601a5a5e2c0cbbf6a847b0e7b2cfed6d9885b5b0abd14f2fd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T02:18:16.284440Z","signature_b64":"gSiWpAwI5OtX9oM9afJ/EfikVJLhYckNPCczp+yI+iOxWGo9fo8J+K7K1Yfuf0Y/jCFeQleshuwLM96c4Xg6Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5f3c961433a8738a63519bfb99c0a4a5770456f293be53d89f5e1b075557401a","last_reissued_at":"2026-06-30T02:18:16.283969Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T02:18:16.283969Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"HSAP: A Hierachical Sequence-aware Parallelism for Hybrid-Context Generative Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DC"],"primary_cat":"cs.LG","authors_text":"Bingyi Jing, Cong Lin, Jiaxing Zhang, Junyu Lu, Songxin Zhang, Zejian Xie, Zhuoyang Song","submitted_at":"2026-06-29T15:26:55Z","abstract_excerpt":"In this paper, we aim to combine the advantages of existing sequence parallelism paradigms and overcomes their drawbacks, the most serious of which is the incapability to correctly compute causal attention on the hybrid-context packed sequences, in a stronger sequence parallelism framework. The practical technique of packing sequences for efficiently pretraining and fine-tuning large language models causes cross-contamination problem in attention computation, which can be effectively solved when no parallelism in the sequence length dimension is taken. However, in sequence parallelism, existin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.30460","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.30460/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.30460","created_at":"2026-06-30T02:18:16.284046+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.30460v1","created_at":"2026-06-30T02:18:16.284046+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.30460","created_at":"2026-06-30T02:18:16.284046+00:00"},{"alias_kind":"pith_short_12","alias_value":"L46JMFBTVBZY","created_at":"2026-06-30T02:18:16.284046+00:00"},{"alias_kind":"pith_short_16","alias_value":"L46JMFBTVBZYUY2R","created_at":"2026-06-30T02:18:16.284046+00:00"},{"alias_kind":"pith_short_8","alias_value":"L46JMFBT","created_at":"2026-06-30T02:18:16.284046+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV","json":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV.json","graph_json":"https://pith.science/api/pith-number/L46JMFBTVBZYUY2RTP5ZTQFEUV/graph.json","events_json":"https://pith.science/api/pith-number/L46JMFBTVBZYUY2RTP5ZTQFEUV/events.json","paper":"https://pith.science/paper/L46JMFBT"},"agent_actions":{"view_html":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV","download_json":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV.json","view_paper":"https://pith.science/paper/L46JMFBT","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.30460&json=true","fetch_graph":"https://pith.science/api/pith-number/L46JMFBTVBZYUY2RTP5ZTQFEUV/graph.json","fetch_events":"https://pith.science/api/pith-number/L46JMFBTVBZYUY2RTP5ZTQFEUV/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV/action/timestamp_anchor","attest_storage":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV/action/storage_attestation","attest_author":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV/action/author_attestation","sign_citation":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV/action/citation_signature","submit_replication":"https://pith.science/pith/L46JMFBTVBZYUY2RTP5ZTQFEUV/action/replication_record"}},"created_at":"2026-06-30T02:18:16.284046+00:00","updated_at":"2026-06-30T02:18:16.284046+00:00"}