{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PI5MLL6K24L6XEWLSGJAM46RFS","short_pith_number":"pith:PI5MLL6K","canonical_record":{"source":{"id":"2604.08552","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-10T18:47:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ba5d3cc5ac20f7f39f8a6d8e2cd81f2bf71d1fe99cddda9ca888fef75df27e13","abstract_canon_sha256":"16e18b1c4c8b586ade2a62f68125a74dc64a80f4094857de5e530974db2b67b3"},"schema_version":"1.0"},"canonical_sha256":"7a3ac5afcad717eb92cb91920673d12cb4be95a68f874be6e9dace9626a96a31","source":{"kind":"arxiv","id":"2604.08552","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.08552","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"arxiv_version","alias_value":"2604.08552v2","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.08552","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"pith_short_12","alias_value":"PI5MLL6K24L6","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"pith_short_16","alias_value":"PI5MLL6K24L6XEWL","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"pith_short_8","alias_value":"PI5MLL6K","created_at":"2026-06-19T16:12:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PI5MLL6K24L6XEWLSGJAM46RFS","target":"record","payload":{"canonical_record":{"source":{"id":"2604.08552","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-10T18:47:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ba5d3cc5ac20f7f39f8a6d8e2cd81f2bf71d1fe99cddda9ca888fef75df27e13","abstract_canon_sha256":"16e18b1c4c8b586ade2a62f68125a74dc64a80f4094857de5e530974db2b67b3"},"schema_version":"1.0"},"canonical_sha256":"7a3ac5afcad717eb92cb91920673d12cb4be95a68f874be6e9dace9626a96a31","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:53.938915Z","signature_b64":"Fv+2raCn8fYQ+V6IQE7cD0IEaIJQH/IHDOh+CqwNKNVjJCOtNsulEZ4xA5NvLljDy0VraJwwxsTOAZl3Pg/RDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7a3ac5afcad717eb92cb91920673d12cb4be95a68f874be6e9dace9626a96a31","last_reissued_at":"2026-06-19T16:12:53.938509Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:53.938509Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.08552","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TU6Z1eCtW+ZKuxs9pc2LDh1FDUIrDzI/epOtjcpoHJEaQM7Zdqle5yD9Cx1nWbRXPRuYiNgC4eSor4HxnNCiAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-22T08:28:30.199492Z"},"content_sha256":"cf1e277adb80882093010a413c7e8ed62f8055f54a603f2cd45c3f1bf86ac1c6","schema_version":"1.0","event_id":"sha256:cf1e277adb80882093010a413c7e8ed62f8055f54a603f2cd45c3f1bf86ac1c6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PI5MLL6K24L6XEWLSGJAM46RFS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Automated Standardization of Legacy Biomedical Metadata Using an Ontology-Constrained LLM Agent","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Augmenting an LLM with real-time queries to biomedical terminology services improves metadata standardization accuracy over the model alone.","cross_cats":["cs.AI"],"primary_cat":"cs.DB","authors_text":"Jean G. Rosario, Josef Hardi, Marcos Martinez-Romero, Mark A. Musen, Martin J. O'Connor, Stephen A. Fisher","submitted_at":"2026-03-10T18:47:30Z","abstract_excerpt":"Scientific metadata are often incomplete and noncompliant with community standards, limiting dataset findability, interoperability, and reuse. Even when standard metadata reporting guidelines exist, they typically lack machine-actionable representations. Producing FAIR datasets requires encoding metadata standards as machine-actionable templates with rich field specifications and precise value constraints. Recent work has shown that LLMs guided by field names and ontology constraints can improve metadata standardization, but these approaches treat constraints as static text prompts, relying on"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"augmenting the LLM with real-time tool access consistently improves prediction accuracy over the LLM alone across both ontology-constrained and non-ontology-constrained fields","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The expert-curated gold standard is treated as ground truth and the real-time terminology services always return canonically correct terms without introducing new errors.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"An LLM system with real-time access to ontology services outperforms plain LLM prompting on standardizing 839 HuBMAP legacy metadata records against an expert gold standard.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Augmenting an LLM with real-time queries to biomedical terminology services improves metadata standardization accuracy over the model alone.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f48d1e34761c9c8d135295a884ad48045aa710e4905786b4c3748182ba323a23"},"source":{"id":"2604.08552","kind":"arxiv","version":2},"verdict":{"id":"b6d03506-3e95-40a9-a106-80b9728ff1f8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T12:37:55.673155Z","strongest_claim":"augmenting the LLM with real-time tool access consistently improves prediction accuracy over the LLM alone across both ontology-constrained and non-ontology-constrained fields","one_line_summary":"An LLM system with real-time access to ontology services outperforms plain LLM prompting on standardizing 839 HuBMAP legacy metadata records against an expert gold standard.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The expert-curated gold standard is treated as ground truth and the real-time terminology services always return canonically correct terms without introducing new errors.","pith_extraction_headline":"Augmenting an LLM with real-time queries to biomedical terminology services improves metadata standardization accuracy over the model alone."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.08552/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"b6d03506-3e95-40a9-a106-80b9728ff1f8"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:12:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/g1EhXMN7NoiOuoAnD3KknGqik2GxneX5vIDD2lFwxDsMPw5Yc4psmA7w+h41HhVUAaiAe/MzusoqyUodxo5BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-22T08:28:30.199975Z"},"content_sha256":"f691bb47c838671d87d63ff35a7d1759ac48586922a9febda306d3479d504568","schema_version":"1.0","event_id":"sha256:f691bb47c838671d87d63ff35a7d1759ac48586922a9febda306d3479d504568"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PI5MLL6K24L6XEWLSGJAM46RFS/bundle.json","state_url":"https://pith.science/pith/PI5MLL6K24L6XEWLSGJAM46RFS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PI5MLL6K24L6XEWLSGJAM46RFS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-22T08:28:30Z","links":{"resolver":"https://pith.science/pith/PI5MLL6K24L6XEWLSGJAM46RFS","bundle":"https://pith.science/pith/PI5MLL6K24L6XEWLSGJAM46RFS/bundle.json","state":"https://pith.science/pith/PI5MLL6K24L6XEWLSGJAM46RFS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PI5MLL6K24L6XEWLSGJAM46RFS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PI5MLL6K24L6XEWLSGJAM46RFS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"16e18b1c4c8b586ade2a62f68125a74dc64a80f4094857de5e530974db2b67b3","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-10T18:47:30Z","title_canon_sha256":"ba5d3cc5ac20f7f39f8a6d8e2cd81f2bf71d1fe99cddda9ca888fef75df27e13"},"schema_version":"1.0","source":{"id":"2604.08552","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.08552","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"arxiv_version","alias_value":"2604.08552v2","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.08552","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"pith_short_12","alias_value":"PI5MLL6K24L6","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"pith_short_16","alias_value":"PI5MLL6K24L6XEWL","created_at":"2026-06-19T16:12:53Z"},{"alias_kind":"pith_short_8","alias_value":"PI5MLL6K","created_at":"2026-06-19T16:12:53Z"}],"graph_snapshots":[{"event_id":"sha256:f691bb47c838671d87d63ff35a7d1759ac48586922a9febda306d3479d504568","target":"graph","created_at":"2026-06-19T16:12:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"augmenting the LLM with real-time tool access consistently improves prediction accuracy over the LLM alone across both ontology-constrained and non-ontology-constrained fields"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The expert-curated gold standard is treated as ground truth and the real-time terminology services always return canonically correct terms without introducing new errors."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"An LLM system with real-time access to ontology services outperforms plain LLM prompting on standardizing 839 HuBMAP legacy metadata records against an expert gold standard."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Augmenting an LLM with real-time queries to biomedical terminology services improves metadata standardization accuracy over the model alone."}],"snapshot_sha256":"f48d1e34761c9c8d135295a884ad48045aa710e4905786b4c3748182ba323a23"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.08552/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Scientific metadata are often incomplete and noncompliant with community standards, limiting dataset findability, interoperability, and reuse. Even when standard metadata reporting guidelines exist, they typically lack machine-actionable representations. Producing FAIR datasets requires encoding metadata standards as machine-actionable templates with rich field specifications and precise value constraints. Recent work has shown that LLMs guided by field names and ontology constraints can improve metadata standardization, but these approaches treat constraints as static text prompts, relying on","authors_text":"Jean G. Rosario, Josef Hardi, Marcos Martinez-Romero, Mark A. Musen, Martin J. O'Connor, Stephen A. Fisher","cross_cats":["cs.AI"],"headline":"Augmenting an LLM with real-time queries to biomedical terminology services improves metadata standardization accuracy over the model alone.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-10T18:47:30Z","title":"Automated Standardization of Legacy Biomedical Metadata Using an Ontology-Constrained LLM Agent"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.08552","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T12:37:55.673155Z","id":"b6d03506-3e95-40a9-a106-80b9728ff1f8","model_set":{"reader":"grok-4.3"},"one_line_summary":"An LLM system with real-time access to ontology services outperforms plain LLM prompting on standardizing 839 HuBMAP legacy metadata records against an expert gold standard.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Augmenting an LLM with real-time queries to biomedical terminology services improves metadata standardization accuracy over the model alone.","strongest_claim":"augmenting the LLM with real-time tool access consistently improves prediction accuracy over the LLM alone across both ontology-constrained and non-ontology-constrained fields","weakest_assumption":"The expert-curated gold standard is treated as ground truth and the real-time terminology services always return canonically correct terms without introducing new errors."}},"verdict_id":"b6d03506-3e95-40a9-a106-80b9728ff1f8"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cf1e277adb80882093010a413c7e8ed62f8055f54a603f2cd45c3f1bf86ac1c6","target":"record","created_at":"2026-06-19T16:12:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"16e18b1c4c8b586ade2a62f68125a74dc64a80f4094857de5e530974db2b67b3","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2026-03-10T18:47:30Z","title_canon_sha256":"ba5d3cc5ac20f7f39f8a6d8e2cd81f2bf71d1fe99cddda9ca888fef75df27e13"},"schema_version":"1.0","source":{"id":"2604.08552","kind":"arxiv","version":2}},"canonical_sha256":"7a3ac5afcad717eb92cb91920673d12cb4be95a68f874be6e9dace9626a96a31","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7a3ac5afcad717eb92cb91920673d12cb4be95a68f874be6e9dace9626a96a31","first_computed_at":"2026-06-19T16:12:53.938509Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:12:53.938509Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Fv+2raCn8fYQ+V6IQE7cD0IEaIJQH/IHDOh+CqwNKNVjJCOtNsulEZ4xA5NvLljDy0VraJwwxsTOAZl3Pg/RDw==","signature_status":"signed_v1","signed_at":"2026-06-19T16:12:53.938915Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.08552","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cf1e277adb80882093010a413c7e8ed62f8055f54a603f2cd45c3f1bf86ac1c6","sha256:f691bb47c838671d87d63ff35a7d1759ac48586922a9febda306d3479d504568"],"state_sha256":"028f497a6676fff95b23f12b26c6f6adb7bbe00862d3d028c53caeab811906f3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qN/UV/NMJJOqH1UL7UPEFHW20HK9/UmGsvtUWVyl+H0D74eJBhkHVIUiN4UFiEy0T3pmiBD+4V0nEuFABxuABw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-22T08:28:30.202279Z","bundle_sha256":"5b59d95c8229432b382a47767090c15d627b6c12b80f39868a12b4591c505bb3"}}