{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:5KNBKHIA77J3IB5QJRABDY5PH7","short_pith_number":"pith:5KNBKHIA","schema_version":"1.0","canonical_sha256":"ea9a151d00ffd3b407b04c4011e3af3fd3d3df770cadff67f6fe3450c8ea3541","source":{"kind":"arxiv","id":"2512.02182","version":3},"attestation_state":"computed","paper":{"title":"Two-phase validation sampling via principal components to improve efficiency in multi-model estimation from error-prone biomedical databases","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.AP"],"primary_cat":"stat.ME","authors_text":"Cole Manschot, Sarah C. Lotspeich","submitted_at":"2025-12-01T20:22:34Z","abstract_excerpt":"Two-phase sampling offers a cost-effective way to validate error-prone covariate measurements in biomedical databases. Inexpensive or easy-to-obtain information is collected for the entire study in Phase I. Then, a subset of patients undergoes cost-intensive validation (e.g., expert chart review) to collect more accurate data in Phase II. When balancing primary and secondary analyses, competing models and priorities can result in poorly defined objectives for the most informative Phase II sampling criterion. Extreme tail sampling (ETS), wherein patients with the smallest and largest values of "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.02182","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ME","submitted_at":"2025-12-01T20:22:34Z","cross_cats_sorted":["stat.AP"],"title_canon_sha256":"b4ba8c88016f430fc9cfe8ee7b8329d79d9569edaef379a37a8f5cdbab12db5d","abstract_canon_sha256":"c0025addf96c3d21f44ac82ce3b275bd4b878dfaa1d93e07cdea7a7c37db6c14"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:19.378451Z","signature_b64":"aBOLF2L9ncUBz1aFiBQrFmi5aD/vayzIyGaikC5Vz/LZXSAp2tBf+l4JKFqzankgvk/XTyIzU+jkCGNCJBjIAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ea9a151d00ffd3b407b04c4011e3af3fd3d3df770cadff67f6fe3450c8ea3541","last_reissued_at":"2026-05-21T01:04:19.377104Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:19.377104Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Two-phase validation sampling via principal components to improve efficiency in multi-model estimation from error-prone biomedical databases","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.AP"],"primary_cat":"stat.ME","authors_text":"Cole Manschot, Sarah C. Lotspeich","submitted_at":"2025-12-01T20:22:34Z","abstract_excerpt":"Two-phase sampling offers a cost-effective way to validate error-prone covariate measurements in biomedical databases. Inexpensive or easy-to-obtain information is collected for the entire study in Phase I. Then, a subset of patients undergoes cost-intensive validation (e.g., expert chart review) to collect more accurate data in Phase II. When balancing primary and secondary analyses, competing models and priorities can result in poorly defined objectives for the most informative Phase II sampling criterion. Extreme tail sampling (ETS), wherein patients with the smallest and largest values of "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.02182","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.02182/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.02182","created_at":"2026-05-21T01:04:19.377225+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.02182v3","created_at":"2026-05-21T01:04:19.377225+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.02182","created_at":"2026-05-21T01:04:19.377225+00:00"},{"alias_kind":"pith_short_12","alias_value":"5KNBKHIA77J3","created_at":"2026-05-21T01:04:19.377225+00:00"},{"alias_kind":"pith_short_16","alias_value":"5KNBKHIA77J3IB5Q","created_at":"2026-05-21T01:04:19.377225+00:00"},{"alias_kind":"pith_short_8","alias_value":"5KNBKHIA","created_at":"2026-05-21T01:04:19.377225+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7","json":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7.json","graph_json":"https://pith.science/api/pith-number/5KNBKHIA77J3IB5QJRABDY5PH7/graph.json","events_json":"https://pith.science/api/pith-number/5KNBKHIA77J3IB5QJRABDY5PH7/events.json","paper":"https://pith.science/paper/5KNBKHIA"},"agent_actions":{"view_html":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7","download_json":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7.json","view_paper":"https://pith.science/paper/5KNBKHIA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.02182&json=true","fetch_graph":"https://pith.science/api/pith-number/5KNBKHIA77J3IB5QJRABDY5PH7/graph.json","fetch_events":"https://pith.science/api/pith-number/5KNBKHIA77J3IB5QJRABDY5PH7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7/action/storage_attestation","attest_author":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7/action/author_attestation","sign_citation":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7/action/citation_signature","submit_replication":"https://pith.science/pith/5KNBKHIA77J3IB5QJRABDY5PH7/action/replication_record"}},"created_at":"2026-05-21T01:04:19.377225+00:00","updated_at":"2026-05-21T01:04:19.377225+00:00"}