{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:SYBJDHAI36UQ5LGRAAHY764TTX","short_pith_number":"pith:SYBJDHAI","canonical_record":{"source":{"id":"2602.00520","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-31T05:21:27Z","cross_cats_sorted":[],"title_canon_sha256":"bbc190419284b0fd364101cfa0d42c85d71fc8d28e5bda0f21b376872e54b45e","abstract_canon_sha256":"e9262779e95c0b42d82984f39f1cfc416ace514913b0ae6c3bacefbeaa30a479"},"schema_version":"1.0"},"canonical_sha256":"9602919c08dfa90eacd1000f8ffb939de37abd5e37724a5b5b0eb295d9112f56","source":{"kind":"arxiv","id":"2602.00520","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.00520","created_at":"2026-05-17T23:39:16Z"},{"alias_kind":"arxiv_version","alias_value":"2602.00520v3","created_at":"2026-05-17T23:39:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.00520","created_at":"2026-05-17T23:39:16Z"},{"alias_kind":"pith_short_12","alias_value":"SYBJDHAI36UQ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"SYBJDHAI36UQ5LGR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"SYBJDHAI","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:SYBJDHAI36UQ5LGRAAHY764TTX","target":"record","payload":{"canonical_record":{"source":{"id":"2602.00520","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-31T05:21:27Z","cross_cats_sorted":[],"title_canon_sha256":"bbc190419284b0fd364101cfa0d42c85d71fc8d28e5bda0f21b376872e54b45e","abstract_canon_sha256":"e9262779e95c0b42d82984f39f1cfc416ace514913b0ae6c3bacefbeaa30a479"},"schema_version":"1.0"},"canonical_sha256":"9602919c08dfa90eacd1000f8ffb939de37abd5e37724a5b5b0eb295d9112f56","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:16.484024Z","signature_b64":"spnI++dsYCDKvA3SYKHutr0fiJmPinw4Pa702e1DbDaWMJvCKWmDO4sFQSjLCjvb4FJ/BH3D4q+UFPWqEtf5BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9602919c08dfa90eacd1000f8ffb939de37abd5e37724a5b5b0eb295d9112f56","last_reissued_at":"2026-05-17T23:39:16.483199Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:16.483199Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.00520","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0cm/+bQGSXNrca6Vm9PBFsgPfxMKw212fgjpWT0hwkwExb3lKLp85jDJdVMccp9tfVGq5MpRWe9PGQ0rHtM2Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T18:21:18.261097Z"},"content_sha256":"c76061c73cd2079a2d83efdbb6e45fff7e3685ac2ec11433fc98dc0c8eff520c","schema_version":"1.0","event_id":"sha256:c76061c73cd2079a2d83efdbb6e45fff7e3685ac2ec11433fc98dc0c8eff520c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:SYBJDHAI36UQ5LGRAAHY764TTX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"NEST: Nested Event Stream Transformer for Sequences of Multisets","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Preserving the original hierarchy of event streams as sequences of multisets improves both computational efficiency and representation quality in foundation models.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Benjamin Goldstein, Haoyu Gong, Jillian Hurst, Matthew Engelhard, Minghui Sun, Xingyu You","submitted_at":"2026-01-31T05:21:27Z","abstract_excerpt":"Event stream data often exhibit hierarchical structure in which multiple events co-occur, resulting in a sequence of multisets (i.e., bags of events). In electronic health records (EHRs), for example, medical events are grouped into a sequence of clinical encounters with well-defined temporal structure, but the order and timing of events within each encounter may be unknown or unreliable. Most existing foundation models (FMs) for event stream data flatten this hierarchy into a one-dimensional sequence, leading to (i) computational inefficiency associated with dense attention and learning spuri"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"preserving the original hierarchy in the FM architecture provides a useful inductive bias that improves both computational efficiency and representation quality.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That flattening the hierarchy into a one-dimensional sequence necessarily creates spurious within-set relationships and that the original multiset structure supplies a reliable inductive bias.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"NEST is a nested transformer for sequences of multisets that uses masked set modeling to learn improved set-level representations from hierarchical event streams like EHRs.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Preserving the original hierarchy of event streams as sequences of multisets improves both computational efficiency and representation quality in foundation models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4e591ecf95a49603d98db895cc4051e195bef720f79594ebd0e832417a2dbb2a"},"source":{"id":"2602.00520","kind":"arxiv","version":3},"verdict":{"id":"8074d50b-c0cd-498a-a0cb-52379f03aeae","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T09:18:03.572484Z","strongest_claim":"preserving the original hierarchy in the FM architecture provides a useful inductive bias that improves both computational efficiency and representation quality.","one_line_summary":"NEST is a nested transformer for sequences of multisets that uses masked set modeling to learn improved set-level representations from hierarchical event streams like EHRs.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That flattening the hierarchy into a one-dimensional sequence necessarily creates spurious within-set relationships and that the original multiset structure supplies a reliable inductive bias.","pith_extraction_headline":"Preserving the original hierarchy of event streams as sequences of multisets improves both computational efficiency and representation quality in foundation models."},"references":{"count":42,"sample":[{"doi":"","year":2004,"title":"Longformer: The Long-Document Transformer","work_id":"abea7a44-6668-4de7-aab6-f53a6e5aa088","ref_index":1,"cited_arxiv_id":"2004.05150","is_internal_anchor":true},{"doi":"","year":2019,"title":"Neural legal judgment prediction in english","work_id":"7c16eeff-cd4c-4888-9dc6-e7b9aaf92c33","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"An exploration of hierarchical attention transformers for efficient long document classification.arXiv preprint arXiv:2210.05529, 2022","work_id":"9e415f65-8d57-4d76-9012-917245fe541e","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"Diffcse: Difference-based contrastive learning for sentence embeddings","work_id":"0fdaadfa-14f9-4791-9e4a-818f2ce9b228","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2019,"title":"Bert: Pre-training of deep bidi- rectional transformers for language understanding","work_id":"1809bfb7-594d-445a-a5e7-fb928992f39d","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":42,"snapshot_sha256":"2fd41879f04057fa1f8b59d62546721642d395f35bed109857761888ad0957b9","internal_anchors":3},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"8074d50b-c0cd-498a-a0cb-52379f03aeae"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SCrMrxKJpkr6rPKTm+DHDCxiq94seQ+THYVraRTauIFHW2YrDz2tQG9oWznoWf+9EPZM26CMUrPo3kemjALCBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T18:21:18.262049Z"},"content_sha256":"ba3eabf2b1eef0642cd4fd71a395383b1701a7bf36ff362de0218fde6ebddc65","schema_version":"1.0","event_id":"sha256:ba3eabf2b1eef0642cd4fd71a395383b1701a7bf36ff362de0218fde6ebddc65"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SYBJDHAI36UQ5LGRAAHY764TTX/bundle.json","state_url":"https://pith.science/pith/SYBJDHAI36UQ5LGRAAHY764TTX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SYBJDHAI36UQ5LGRAAHY764TTX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T18:21:18Z","links":{"resolver":"https://pith.science/pith/SYBJDHAI36UQ5LGRAAHY764TTX","bundle":"https://pith.science/pith/SYBJDHAI36UQ5LGRAAHY764TTX/bundle.json","state":"https://pith.science/pith/SYBJDHAI36UQ5LGRAAHY764TTX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SYBJDHAI36UQ5LGRAAHY764TTX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SYBJDHAI36UQ5LGRAAHY764TTX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e9262779e95c0b42d82984f39f1cfc416ace514913b0ae6c3bacefbeaa30a479","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-31T05:21:27Z","title_canon_sha256":"bbc190419284b0fd364101cfa0d42c85d71fc8d28e5bda0f21b376872e54b45e"},"schema_version":"1.0","source":{"id":"2602.00520","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.00520","created_at":"2026-05-17T23:39:16Z"},{"alias_kind":"arxiv_version","alias_value":"2602.00520v3","created_at":"2026-05-17T23:39:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.00520","created_at":"2026-05-17T23:39:16Z"},{"alias_kind":"pith_short_12","alias_value":"SYBJDHAI36UQ","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"SYBJDHAI36UQ5LGR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"SYBJDHAI","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:ba3eabf2b1eef0642cd4fd71a395383b1701a7bf36ff362de0218fde6ebddc65","target":"graph","created_at":"2026-05-17T23:39:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"preserving the original hierarchy in the FM architecture provides a useful inductive bias that improves both computational efficiency and representation quality."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That flattening the hierarchy into a one-dimensional sequence necessarily creates spurious within-set relationships and that the original multiset structure supplies a reliable inductive bias."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"NEST is a nested transformer for sequences of multisets that uses masked set modeling to learn improved set-level representations from hierarchical event streams like EHRs."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Preserving the original hierarchy of event streams as sequences of multisets improves both computational efficiency and representation quality in foundation models."}],"snapshot_sha256":"4e591ecf95a49603d98db895cc4051e195bef720f79594ebd0e832417a2dbb2a"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Event stream data often exhibit hierarchical structure in which multiple events co-occur, resulting in a sequence of multisets (i.e., bags of events). In electronic health records (EHRs), for example, medical events are grouped into a sequence of clinical encounters with well-defined temporal structure, but the order and timing of events within each encounter may be unknown or unreliable. Most existing foundation models (FMs) for event stream data flatten this hierarchy into a one-dimensional sequence, leading to (i) computational inefficiency associated with dense attention and learning spuri","authors_text":"Benjamin Goldstein, Haoyu Gong, Jillian Hurst, Matthew Engelhard, Minghui Sun, Xingyu You","cross_cats":[],"headline":"Preserving the original hierarchy of event streams as sequences of multisets improves both computational efficiency and representation quality in foundation models.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-31T05:21:27Z","title":"NEST: Nested Event Stream Transformer for Sequences of Multisets"},"references":{"count":42,"internal_anchors":3,"resolved_work":42,"sample":[{"cited_arxiv_id":"2004.05150","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Longformer: The Long-Document Transformer","work_id":"abea7a44-6668-4de7-aab6-f53a6e5aa088","year":2004},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Neural legal judgment prediction in english","work_id":"7c16eeff-cd4c-4888-9dc6-e7b9aaf92c33","year":2019},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"An exploration of hierarchical attention transformers for efficient long document classification.arXiv preprint arXiv:2210.05529, 2022","work_id":"9e415f65-8d57-4d76-9012-917245fe541e","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Diffcse: Difference-based contrastive learning for sentence embeddings","work_id":"0fdaadfa-14f9-4791-9e4a-818f2ce9b228","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Bert: Pre-training of deep bidi- rectional transformers for language understanding","work_id":"1809bfb7-594d-445a-a5e7-fb928992f39d","year":2019}],"snapshot_sha256":"2fd41879f04057fa1f8b59d62546721642d395f35bed109857761888ad0957b9"},"source":{"id":"2602.00520","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-16T09:18:03.572484Z","id":"8074d50b-c0cd-498a-a0cb-52379f03aeae","model_set":{"reader":"grok-4.3"},"one_line_summary":"NEST is a nested transformer for sequences of multisets that uses masked set modeling to learn improved set-level representations from hierarchical event streams like EHRs.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Preserving the original hierarchy of event streams as sequences of multisets improves both computational efficiency and representation quality in foundation models.","strongest_claim":"preserving the original hierarchy in the FM architecture provides a useful inductive bias that improves both computational efficiency and representation quality.","weakest_assumption":"That flattening the hierarchy into a one-dimensional sequence necessarily creates spurious within-set relationships and that the original multiset structure supplies a reliable inductive bias."}},"verdict_id":"8074d50b-c0cd-498a-a0cb-52379f03aeae"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c76061c73cd2079a2d83efdbb6e45fff7e3685ac2ec11433fc98dc0c8eff520c","target":"record","created_at":"2026-05-17T23:39:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e9262779e95c0b42d82984f39f1cfc416ace514913b0ae6c3bacefbeaa30a479","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-01-31T05:21:27Z","title_canon_sha256":"bbc190419284b0fd364101cfa0d42c85d71fc8d28e5bda0f21b376872e54b45e"},"schema_version":"1.0","source":{"id":"2602.00520","kind":"arxiv","version":3}},"canonical_sha256":"9602919c08dfa90eacd1000f8ffb939de37abd5e37724a5b5b0eb295d9112f56","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9602919c08dfa90eacd1000f8ffb939de37abd5e37724a5b5b0eb295d9112f56","first_computed_at":"2026-05-17T23:39:16.483199Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:16.483199Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"spnI++dsYCDKvA3SYKHutr0fiJmPinw4Pa702e1DbDaWMJvCKWmDO4sFQSjLCjvb4FJ/BH3D4q+UFPWqEtf5BA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:16.484024Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.00520","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c76061c73cd2079a2d83efdbb6e45fff7e3685ac2ec11433fc98dc0c8eff520c","sha256:ba3eabf2b1eef0642cd4fd71a395383b1701a7bf36ff362de0218fde6ebddc65"],"state_sha256":"44a74bd2c7e7d46b0888cb74b3f1179d7e0a7d44d86a4c50d45270f080404761"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Aq5KJfSutsOF9KEf95ztLqipsgP8Ba6yRfTfBkR5cK7x24Bg5atA4tY7U/sgvg0gZBlJZwx5iu/aEDZh6W1SDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T18:21:18.265904Z","bundle_sha256":"02f1b7de7b0fc2c20cdad8930fcddab15f788d1604753d42df05047b349aefc7"}}