{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QJ4S33PEU5SO4656UQDCRRZINW","short_pith_number":"pith:QJ4S33PE","canonical_record":{"source":{"id":"2606.13732","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T11:49:39Z","cross_cats_sorted":[],"title_canon_sha256":"643531231c869862e5bc4e12ee7df19ccc0a6225ac71b7df56318012f7cd8224","abstract_canon_sha256":"5a2442bb8425a44e8b33d86bd49413c540a4b2ddf3e474ebd91231a938a66f44"},"schema_version":"1.0"},"canonical_sha256":"82792dede4a764ee7bbea40628c7286dbafb1d843169878e54ddb598b8eb5079","source":{"kind":"arxiv","id":"2606.13732","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.13732","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"arxiv_version","alias_value":"2606.13732v2","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.13732","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"pith_short_12","alias_value":"QJ4S33PEU5SO","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"pith_short_16","alias_value":"QJ4S33PEU5SO4656","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"pith_short_8","alias_value":"QJ4S33PE","created_at":"2026-07-03T01:17:23Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QJ4S33PEU5SO4656UQDCRRZINW","target":"record","payload":{"canonical_record":{"source":{"id":"2606.13732","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T11:49:39Z","cross_cats_sorted":[],"title_canon_sha256":"643531231c869862e5bc4e12ee7df19ccc0a6225ac71b7df56318012f7cd8224","abstract_canon_sha256":"5a2442bb8425a44e8b33d86bd49413c540a4b2ddf3e474ebd91231a938a66f44"},"schema_version":"1.0"},"canonical_sha256":"82792dede4a764ee7bbea40628c7286dbafb1d843169878e54ddb598b8eb5079","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:17:23.067764Z","signature_b64":"fLZMecgArKOgctLvli5OaY1917OObig+kwH3OR+wCbAVCvHAdTp6E8NKXoGsgKNl86RYoyodt+N/adUHo1xzBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"82792dede4a764ee7bbea40628c7286dbafb1d843169878e54ddb598b8eb5079","last_reissued_at":"2026-07-03T01:17:23.067272Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:17:23.067272Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.13732","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-03T01:17:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1tTSjpFbSiXzANAMP8owDs+1frzJPRZrLDCXDfyr0dkuEbdZ/lbu1o7sEbHyG1Aglub5+gTZaUWX+21vMchJCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T13:59:29.118589Z"},"content_sha256":"8b3d7f5d418060a74d6d8584973259637a462d6931d130da179d671dc543b1a6","schema_version":"1.0","event_id":"sha256:8b3d7f5d418060a74d6d8584973259637a462d6931d130da179d671dc543b1a6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QJ4S33PEU5SO4656UQDCRRZINW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"When Sample Selection Bias Precipitates Model Collapse","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Jingqi Zhang, Meng Zhang, Peihua Mai, Wei Liu, Xianglong Du, Xinbao Qiao, Yan Pang","submitted_at":"2026-06-11T11:49:39Z","abstract_excerpt":"The proliferation of recursive training on synthetic data can alleviate data scarcity but risks model collapse, where repeated training erodes distributional tails and homogenizes outputs. Data selection is widely viewed as a remedy, yet its reliability depends critically on the reference distribution used by the verifier. We show that in low-resource verification regimes, where each verifier observes only a small, fragmented, and biased slice of the target manifold, selection itself becomes biased. This situation naturally arises in low-resource data silos such as healthcare consortia or prop"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.13732","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.13732/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-03T01:17:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bzA8XOK3QHYPUCRnOl5adao4LxQJmtwGs9tuS1ap4w52K307oJ3ZkXUAVLeA0xqDyT32tO3+YFtvQTLUAXXsDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T13:59:29.118956Z"},"content_sha256":"d9a902fc828da2ae58c134ae1325c7b312c692fd74c6bac4685bd8c5e4f8d163","schema_version":"1.0","event_id":"sha256:d9a902fc828da2ae58c134ae1325c7b312c692fd74c6bac4685bd8c5e4f8d163"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QJ4S33PEU5SO4656UQDCRRZINW/bundle.json","state_url":"https://pith.science/pith/QJ4S33PEU5SO4656UQDCRRZINW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QJ4S33PEU5SO4656UQDCRRZINW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T13:59:29Z","links":{"resolver":"https://pith.science/pith/QJ4S33PEU5SO4656UQDCRRZINW","bundle":"https://pith.science/pith/QJ4S33PEU5SO4656UQDCRRZINW/bundle.json","state":"https://pith.science/pith/QJ4S33PEU5SO4656UQDCRRZINW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QJ4S33PEU5SO4656UQDCRRZINW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QJ4S33PEU5SO4656UQDCRRZINW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5a2442bb8425a44e8b33d86bd49413c540a4b2ddf3e474ebd91231a938a66f44","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T11:49:39Z","title_canon_sha256":"643531231c869862e5bc4e12ee7df19ccc0a6225ac71b7df56318012f7cd8224"},"schema_version":"1.0","source":{"id":"2606.13732","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.13732","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"arxiv_version","alias_value":"2606.13732v2","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.13732","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"pith_short_12","alias_value":"QJ4S33PEU5SO","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"pith_short_16","alias_value":"QJ4S33PEU5SO4656","created_at":"2026-07-03T01:17:23Z"},{"alias_kind":"pith_short_8","alias_value":"QJ4S33PE","created_at":"2026-07-03T01:17:23Z"}],"graph_snapshots":[{"event_id":"sha256:d9a902fc828da2ae58c134ae1325c7b312c692fd74c6bac4685bd8c5e4f8d163","target":"graph","created_at":"2026-07-03T01:17:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.13732/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The proliferation of recursive training on synthetic data can alleviate data scarcity but risks model collapse, where repeated training erodes distributional tails and homogenizes outputs. Data selection is widely viewed as a remedy, yet its reliability depends critically on the reference distribution used by the verifier. We show that in low-resource verification regimes, where each verifier observes only a small, fragmented, and biased slice of the target manifold, selection itself becomes biased. This situation naturally arises in low-resource data silos such as healthcare consortia or prop","authors_text":"Jingqi Zhang, Meng Zhang, Peihua Mai, Wei Liu, Xianglong Du, Xinbao Qiao, Yan Pang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T11:49:39Z","title":"When Sample Selection Bias Precipitates Model Collapse"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.13732","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8b3d7f5d418060a74d6d8584973259637a462d6931d130da179d671dc543b1a6","target":"record","created_at":"2026-07-03T01:17:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5a2442bb8425a44e8b33d86bd49413c540a4b2ddf3e474ebd91231a938a66f44","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T11:49:39Z","title_canon_sha256":"643531231c869862e5bc4e12ee7df19ccc0a6225ac71b7df56318012f7cd8224"},"schema_version":"1.0","source":{"id":"2606.13732","kind":"arxiv","version":2}},"canonical_sha256":"82792dede4a764ee7bbea40628c7286dbafb1d843169878e54ddb598b8eb5079","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82792dede4a764ee7bbea40628c7286dbafb1d843169878e54ddb598b8eb5079","first_computed_at":"2026-07-03T01:17:23.067272Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T01:17:23.067272Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fLZMecgArKOgctLvli5OaY1917OObig+kwH3OR+wCbAVCvHAdTp6E8NKXoGsgKNl86RYoyodt+N/adUHo1xzBQ==","signature_status":"signed_v1","signed_at":"2026-07-03T01:17:23.067764Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.13732","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8b3d7f5d418060a74d6d8584973259637a462d6931d130da179d671dc543b1a6","sha256:d9a902fc828da2ae58c134ae1325c7b312c692fd74c6bac4685bd8c5e4f8d163"],"state_sha256":"30188216960308879a2d9bb067734c42d5a3dab01f7f45addf9c3d8ba32d46b4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uw5uWyAvaLxCSjL/eR3RBpWKfw+cqeoQ/roEr+VUEYAQHcQm5RE1Ki2j7HKsJx8eD4PaO9hpTVGjmllSI2RMAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T13:59:29.120895Z","bundle_sha256":"3917727be79bcbaf7d7666446110c1dd0f5bf85cc9865b70cd26454a1f41ea56"}}