{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:A6KF5IVYSAPCLM546R3RS2ZRCM","short_pith_number":"pith:A6KF5IVY","canonical_record":{"source":{"id":"2510.06048","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-07T15:42:33Z","cross_cats_sorted":[],"title_canon_sha256":"9408559d7bd3eac75c4bef511f997ef1996f1eb69c1c25fbd5b4fe07c32e90ba","abstract_canon_sha256":"d966dbe7bb0ddaeca0f3a6f75f0f119e43cc5854dbad0bd692e844cf57bc24b4"},"schema_version":"1.0"},"canonical_sha256":"07945ea2b8901e25b3bcf477196b31131a03ff18b6cf6a6046f64afcf1871783","source":{"kind":"arxiv","id":"2510.06048","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.06048","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"arxiv_version","alias_value":"2510.06048v4","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.06048","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"pith_short_12","alias_value":"A6KF5IVYSAPC","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"pith_short_16","alias_value":"A6KF5IVYSAPCLM54","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"pith_short_8","alias_value":"A6KF5IVY","created_at":"2026-06-02T02:04:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:A6KF5IVYSAPCLM546R3RS2ZRCM","target":"record","payload":{"canonical_record":{"source":{"id":"2510.06048","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-07T15:42:33Z","cross_cats_sorted":[],"title_canon_sha256":"9408559d7bd3eac75c4bef511f997ef1996f1eb69c1c25fbd5b4fe07c32e90ba","abstract_canon_sha256":"d966dbe7bb0ddaeca0f3a6f75f0f119e43cc5854dbad0bd692e844cf57bc24b4"},"schema_version":"1.0"},"canonical_sha256":"07945ea2b8901e25b3bcf477196b31131a03ff18b6cf6a6046f64afcf1871783","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T02:04:09.788108Z","signature_b64":"YBXBh99GXCaJi4+IMu3G2iNBjKhJDEjf2E1SkJNz7x05Vm16ipkKIAvYx4rSIwWqsm5scQfSwcdfxh5N8oSLBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"07945ea2b8901e25b3bcf477196b31131a03ff18b6cf6a6046f64afcf1871783","last_reissued_at":"2026-06-02T02:04:09.787646Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T02:04:09.787646Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.06048","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T02:04:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qk5jiYd/1//ILnKd3qp1TBCQ5VToxWW7JWjCPp5M2bXnroWTQrKF2AifGoPRTDSTEfsg8lfnJpLHdbNuxF8JAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T03:50:02.444409Z"},"content_sha256":"4cef0befed76270314ddf8f9b3f80f15a0beba7fc0ed786993276c5b3980a06b","schema_version":"1.0","event_id":"sha256:4cef0befed76270314ddf8f9b3f80f15a0beba7fc0ed786993276c5b3980a06b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:A6KF5IVYSAPCLM546R3RS2ZRCM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"BLISS: A Lightweight Bilevel Influence Scoring Method for Data Selection in Language Model Pretraining","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Huixia Wang, Jie Hao, Jie Xu, Mingrui Liu, Rui Yu, Wei Zhang","submitted_at":"2025-10-07T15:42:33Z","abstract_excerpt":"Effective data selection is essential for pretraining large language models (LLMs), enhancing efficiency and improving generalization to downstream tasks. However, existing approaches often require leveraging external pretrained models, making it difficult to disentangle the effects of data selection from those of the external pretrained models. In addition, they often overlook the long-term impact of selected data if the model is trained to convergence, primarily due to the prohibitive cost of full-scale LLM pretraining. In this paper, we introduce BLISS (\\textbf{B}ileve\\textbf{L} \\textbf{I}n"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.06048","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.06048/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T02:04:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xwHSoYI76GP0AaE/jvn4DMYbIgYbY6xjew5o0sr1lKtfg2dmppKtZC9Yq4HJMzl8cE69VaY8YSk0awV8feb3Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T03:50:02.445207Z"},"content_sha256":"5d9dc44c2070e0bc65c190c84ec9ad8d33cd505139df5e1241953bf2da8000bc","schema_version":"1.0","event_id":"sha256:5d9dc44c2070e0bc65c190c84ec9ad8d33cd505139df5e1241953bf2da8000bc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/A6KF5IVYSAPCLM546R3RS2ZRCM/bundle.json","state_url":"https://pith.science/pith/A6KF5IVYSAPCLM546R3RS2ZRCM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/A6KF5IVYSAPCLM546R3RS2ZRCM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T03:50:02Z","links":{"resolver":"https://pith.science/pith/A6KF5IVYSAPCLM546R3RS2ZRCM","bundle":"https://pith.science/pith/A6KF5IVYSAPCLM546R3RS2ZRCM/bundle.json","state":"https://pith.science/pith/A6KF5IVYSAPCLM546R3RS2ZRCM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/A6KF5IVYSAPCLM546R3RS2ZRCM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:A6KF5IVYSAPCLM546R3RS2ZRCM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d966dbe7bb0ddaeca0f3a6f75f0f119e43cc5854dbad0bd692e844cf57bc24b4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-07T15:42:33Z","title_canon_sha256":"9408559d7bd3eac75c4bef511f997ef1996f1eb69c1c25fbd5b4fe07c32e90ba"},"schema_version":"1.0","source":{"id":"2510.06048","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.06048","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"arxiv_version","alias_value":"2510.06048v4","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.06048","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"pith_short_12","alias_value":"A6KF5IVYSAPC","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"pith_short_16","alias_value":"A6KF5IVYSAPCLM54","created_at":"2026-06-02T02:04:09Z"},{"alias_kind":"pith_short_8","alias_value":"A6KF5IVY","created_at":"2026-06-02T02:04:09Z"}],"graph_snapshots":[{"event_id":"sha256:5d9dc44c2070e0bc65c190c84ec9ad8d33cd505139df5e1241953bf2da8000bc","target":"graph","created_at":"2026-06-02T02:04:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.06048/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Effective data selection is essential for pretraining large language models (LLMs), enhancing efficiency and improving generalization to downstream tasks. However, existing approaches often require leveraging external pretrained models, making it difficult to disentangle the effects of data selection from those of the external pretrained models. In addition, they often overlook the long-term impact of selected data if the model is trained to convergence, primarily due to the prohibitive cost of full-scale LLM pretraining. In this paper, we introduce BLISS (\\textbf{B}ileve\\textbf{L} \\textbf{I}n","authors_text":"Huixia Wang, Jie Hao, Jie Xu, Mingrui Liu, Rui Yu, Wei Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-07T15:42:33Z","title":"BLISS: A Lightweight Bilevel Influence Scoring Method for Data Selection in Language Model Pretraining"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.06048","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4cef0befed76270314ddf8f9b3f80f15a0beba7fc0ed786993276c5b3980a06b","target":"record","created_at":"2026-06-02T02:04:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d966dbe7bb0ddaeca0f3a6f75f0f119e43cc5854dbad0bd692e844cf57bc24b4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-07T15:42:33Z","title_canon_sha256":"9408559d7bd3eac75c4bef511f997ef1996f1eb69c1c25fbd5b4fe07c32e90ba"},"schema_version":"1.0","source":{"id":"2510.06048","kind":"arxiv","version":4}},"canonical_sha256":"07945ea2b8901e25b3bcf477196b31131a03ff18b6cf6a6046f64afcf1871783","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"07945ea2b8901e25b3bcf477196b31131a03ff18b6cf6a6046f64afcf1871783","first_computed_at":"2026-06-02T02:04:09.787646Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T02:04:09.787646Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"YBXBh99GXCaJi4+IMu3G2iNBjKhJDEjf2E1SkJNz7x05Vm16ipkKIAvYx4rSIwWqsm5scQfSwcdfxh5N8oSLBg==","signature_status":"signed_v1","signed_at":"2026-06-02T02:04:09.788108Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.06048","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4cef0befed76270314ddf8f9b3f80f15a0beba7fc0ed786993276c5b3980a06b","sha256:5d9dc44c2070e0bc65c190c84ec9ad8d33cd505139df5e1241953bf2da8000bc"],"state_sha256":"01d9902e0f4ff7c2af3c55ec340edfed47886daef58f9e643c1d014c36b3edc5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Og83R8jqxKdXUo7QUNjYr9T2XFkeFrd83K/qXNT12ZohX4Q4G+zZP1M0+91Hc2NnTQNaZc5YACIRvV4n67pzDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T03:50:02.449979Z","bundle_sha256":"1944417f15d06358440a5b26b566f3c353d95950362341c95908c0bca173b802"}}