{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:4LEBKC33PX4BS7IHX3D6C55VBZ","short_pith_number":"pith:4LEBKC33","schema_version":"1.0","canonical_sha256":"e2c8150b7b7df8197d07bec7e177b50e4436349b423872afbf996bded71b7ad9","source":{"kind":"arxiv","id":"1708.01286","version":1},"attestation_state":"computed","paper":{"title":"Metadata in the BioSample Online Repository are Impaired by Numerous Anomalies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"John Graybeal, Marcos Mart\\'inez-Romero, Mark A. Musen, Martin J. O'Connor, Rafael S. Gon\\c{c}alves","submitted_at":"2017-08-03T19:27:06Z","abstract_excerpt":"The metadata about scientific experiments are crucial for finding, reproducing, and reusing the data that the metadata describe. We present a study of the quality of the metadata stored in BioSample--a repository of metadata about samples used in biomedical experiments managed by the U.S. National Center for Biomedical Technology Information (NCBI). We tested whether 6.6 million BioSample metadata records are populated with values that fulfill the stated requirements for such values. Our study revealed multiple anomalies in the analyzed metadata. The BioSample metadata field names and their va"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1708.01286","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-08-03T19:27:06Z","cross_cats_sorted":[],"title_canon_sha256":"be97716ea81fcedb2601194838f0664f27f1a4bcc9b408a8b346fb53d91fc3f9","abstract_canon_sha256":"1d131189e1fe00fa2fa2dcc8ef65bcfefe763631a74ed91eee72862c0d0b389e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:38:37.796653Z","signature_b64":"kswuLBPGv2zV73E607gu192Y4harxmd1ahkjqgv71NjIeljxOj/riMIozEb0a1annu5L237rzf9AVkbbQ8w3BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e2c8150b7b7df8197d07bec7e177b50e4436349b423872afbf996bded71b7ad9","last_reissued_at":"2026-05-18T00:38:37.796197Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:38:37.796197Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Metadata in the BioSample Online Repository are Impaired by Numerous Anomalies","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"John Graybeal, Marcos Mart\\'inez-Romero, Mark A. Musen, Martin J. O'Connor, Rafael S. Gon\\c{c}alves","submitted_at":"2017-08-03T19:27:06Z","abstract_excerpt":"The metadata about scientific experiments are crucial for finding, reproducing, and reusing the data that the metadata describe. We present a study of the quality of the metadata stored in BioSample--a repository of metadata about samples used in biomedical experiments managed by the U.S. National Center for Biomedical Technology Information (NCBI). We tested whether 6.6 million BioSample metadata records are populated with values that fulfill the stated requirements for such values. Our study revealed multiple anomalies in the analyzed metadata. The BioSample metadata field names and their va"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.01286","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1708.01286","created_at":"2026-05-18T00:38:37.796268+00:00"},{"alias_kind":"arxiv_version","alias_value":"1708.01286v1","created_at":"2026-05-18T00:38:37.796268+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.01286","created_at":"2026-05-18T00:38:37.796268+00:00"},{"alias_kind":"pith_short_12","alias_value":"4LEBKC33PX4B","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_16","alias_value":"4LEBKC33PX4BS7IH","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_8","alias_value":"4LEBKC33","created_at":"2026-05-18T12:31:00.734936+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ","json":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ.json","graph_json":"https://pith.science/api/pith-number/4LEBKC33PX4BS7IHX3D6C55VBZ/graph.json","events_json":"https://pith.science/api/pith-number/4LEBKC33PX4BS7IHX3D6C55VBZ/events.json","paper":"https://pith.science/paper/4LEBKC33"},"agent_actions":{"view_html":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ","download_json":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ.json","view_paper":"https://pith.science/paper/4LEBKC33","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1708.01286&json=true","fetch_graph":"https://pith.science/api/pith-number/4LEBKC33PX4BS7IHX3D6C55VBZ/graph.json","fetch_events":"https://pith.science/api/pith-number/4LEBKC33PX4BS7IHX3D6C55VBZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ/action/storage_attestation","attest_author":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ/action/author_attestation","sign_citation":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ/action/citation_signature","submit_replication":"https://pith.science/pith/4LEBKC33PX4BS7IHX3D6C55VBZ/action/replication_record"}},"created_at":"2026-05-18T00:38:37.796268+00:00","updated_at":"2026-05-18T00:38:37.796268+00:00"}