{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:5NWYXOMOUGIRLZLH7IH2XRS34P","short_pith_number":"pith:5NWYXOMO","canonical_record":{"source":{"id":"2605.15079","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T17:04:39Z","cross_cats_sorted":["cs.DB","cs.DL","cs.IR"],"title_canon_sha256":"05e6cca40cfa097227ceb856ef192b40b703aba4da0645fd29d2c23873e7a805","abstract_canon_sha256":"a3fe3d2da2a53353118bb6852b8ae2c08fb22c9facc721bfcc75c54d4a62f6ff"},"schema_version":"1.0"},"canonical_sha256":"eb6d8bb98ea19115e567fa0fabc65be3c5489568cb40383b0829aa80c8c3cd36","source":{"kind":"arxiv","id":"2605.15079","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15079","created_at":"2026-05-17T23:38:54Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15079v1","created_at":"2026-05-17T23:38:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15079","created_at":"2026-05-17T23:38:54Z"},{"alias_kind":"pith_short_12","alias_value":"5NWYXOMOUGIR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"5NWYXOMOUGIRLZLH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"5NWYXOMO","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:5NWYXOMOUGIRLZLH7IH2XRS34P","target":"record","payload":{"canonical_record":{"source":{"id":"2605.15079","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T17:04:39Z","cross_cats_sorted":["cs.DB","cs.DL","cs.IR"],"title_canon_sha256":"05e6cca40cfa097227ceb856ef192b40b703aba4da0645fd29d2c23873e7a805","abstract_canon_sha256":"a3fe3d2da2a53353118bb6852b8ae2c08fb22c9facc721bfcc75c54d4a62f6ff"},"schema_version":"1.0"},"canonical_sha256":"eb6d8bb98ea19115e567fa0fabc65be3c5489568cb40383b0829aa80c8c3cd36","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:54.163296Z","signature_b64":"TZPjFIDKlT/8xzN+jkwphuWKSQXgBsiSz89ylo5BPO/yd7155UhxSSQyLd8mm19PEgCqfo7o+yeoVNY2NSHsBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eb6d8bb98ea19115e567fa0fabc65be3c5489568cb40383b0829aa80c8c3cd36","last_reissued_at":"2026-05-17T23:38:54.162577Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:54.162577Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.15079","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DlkAJXPXDOMtRFM5adJuDUHV0vHKZTYDeuxxcpOErVMd41ldkgnQOiHW1wkmtTr0NJmZX99lVSyJgHX+5ByxDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T09:11:30.648444Z"},"content_sha256":"5fc91567a34102c09c76ec80d6c9701b7578c88c48b3979f87a313d414360a54","schema_version":"1.0","event_id":"sha256:5fc91567a34102c09c76ec80d6c9701b7578c88c48b3979f87a313d414360a54"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:5NWYXOMOUGIRLZLH7IH2XRS34P","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Croissant Baker: Metadata Generation for Discoverable, Governable, and Reusable ML Datasets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB","cs.DL","cs.IR"],"primary_cat":"cs.LG","authors_text":"Anwai Archit, Christina Conrad Parry, Debanshu Das, Eric S. Rosenthal, Joan Giner-Miguelez, Joaquin Vanschoren, Lara Grosso, Luis Oala, Marzyeh Ghassemi, Matthew McDermott, Nobin Sarwar, Rafi Al Attrach, Rajat Ghosh, Rajna Fani, Sebastian Lobentanzer, Steffen Vogler, Sujata Goswami, Surbhi Motghare, Tom Pollard, Varuni H. K.","submitted_at":"2026-05-14T17:04:39Z","abstract_excerpt":"Croissant has emerged as the metadata standard for machine learning datasets, providing a structured, JSON-LD-based format that makes dataset discovery, automated ingestion, and reproducible analysis machine-checkable across ML platforms. Adoption has accelerated, and NeurIPS now requires Croissant metadata in every submission to its dataset tracks. Yet in practice Croissant generation usually starts with uploading data to a public platform, a path infeasible for governed and large local repositories that hold much of the high-value data ML increasingly relies on. We release Croissant Baker, a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.15079","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"d0/0PZjurX5wPcavVMwFC7zDCV0Jqs6he1ig4ZUPDJUBFUs6v//ddxPQqxDZSgTYe+YzPI72lVA/A565gTgTAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T09:11:30.649128Z"},"content_sha256":"bf6edc8360d7a0c5706957f9dd896dba6b1755a74b937edfa5d8d04ba927744e","schema_version":"1.0","event_id":"sha256:bf6edc8360d7a0c5706957f9dd896dba6b1755a74b937edfa5d8d04ba927744e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5NWYXOMOUGIRLZLH7IH2XRS34P/bundle.json","state_url":"https://pith.science/pith/5NWYXOMOUGIRLZLH7IH2XRS34P/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5NWYXOMOUGIRLZLH7IH2XRS34P/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T09:11:30Z","links":{"resolver":"https://pith.science/pith/5NWYXOMOUGIRLZLH7IH2XRS34P","bundle":"https://pith.science/pith/5NWYXOMOUGIRLZLH7IH2XRS34P/bundle.json","state":"https://pith.science/pith/5NWYXOMOUGIRLZLH7IH2XRS34P/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5NWYXOMOUGIRLZLH7IH2XRS34P/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:5NWYXOMOUGIRLZLH7IH2XRS34P","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a3fe3d2da2a53353118bb6852b8ae2c08fb22c9facc721bfcc75c54d4a62f6ff","cross_cats_sorted":["cs.DB","cs.DL","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T17:04:39Z","title_canon_sha256":"05e6cca40cfa097227ceb856ef192b40b703aba4da0645fd29d2c23873e7a805"},"schema_version":"1.0","source":{"id":"2605.15079","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15079","created_at":"2026-05-17T23:38:54Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15079v1","created_at":"2026-05-17T23:38:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15079","created_at":"2026-05-17T23:38:54Z"},{"alias_kind":"pith_short_12","alias_value":"5NWYXOMOUGIR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"5NWYXOMOUGIRLZLH","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"5NWYXOMO","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:bf6edc8360d7a0c5706957f9dd896dba6b1755a74b937edfa5d8d04ba927744e","target":"graph","created_at":"2026-05-17T23:38:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Croissant has emerged as the metadata standard for machine learning datasets, providing a structured, JSON-LD-based format that makes dataset discovery, automated ingestion, and reproducible analysis machine-checkable across ML platforms. Adoption has accelerated, and NeurIPS now requires Croissant metadata in every submission to its dataset tracks. Yet in practice Croissant generation usually starts with uploading data to a public platform, a path infeasible for governed and large local repositories that hold much of the high-value data ML increasingly relies on. We release Croissant Baker, a","authors_text":"Anwai Archit, Christina Conrad Parry, Debanshu Das, Eric S. Rosenthal, Joan Giner-Miguelez, Joaquin Vanschoren, Lara Grosso, Luis Oala, Marzyeh Ghassemi, Matthew McDermott, Nobin Sarwar, Rafi Al Attrach, Rajat Ghosh, Rajna Fani, Sebastian Lobentanzer, Steffen Vogler, Sujata Goswami, Surbhi Motghare, Tom Pollard, Varuni H. K.","cross_cats":["cs.DB","cs.DL","cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T17:04:39Z","title":"Croissant Baker: Metadata Generation for Discoverable, Governable, and Reusable ML Datasets"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.15079","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5fc91567a34102c09c76ec80d6c9701b7578c88c48b3979f87a313d414360a54","target":"record","created_at":"2026-05-17T23:38:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a3fe3d2da2a53353118bb6852b8ae2c08fb22c9facc721bfcc75c54d4a62f6ff","cross_cats_sorted":["cs.DB","cs.DL","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T17:04:39Z","title_canon_sha256":"05e6cca40cfa097227ceb856ef192b40b703aba4da0645fd29d2c23873e7a805"},"schema_version":"1.0","source":{"id":"2605.15079","kind":"arxiv","version":1}},"canonical_sha256":"eb6d8bb98ea19115e567fa0fabc65be3c5489568cb40383b0829aa80c8c3cd36","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"eb6d8bb98ea19115e567fa0fabc65be3c5489568cb40383b0829aa80c8c3cd36","first_computed_at":"2026-05-17T23:38:54.162577Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:54.162577Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"TZPjFIDKlT/8xzN+jkwphuWKSQXgBsiSz89ylo5BPO/yd7155UhxSSQyLd8mm19PEgCqfo7o+yeoVNY2NSHsBQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:54.163296Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.15079","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5fc91567a34102c09c76ec80d6c9701b7578c88c48b3979f87a313d414360a54","sha256:bf6edc8360d7a0c5706957f9dd896dba6b1755a74b937edfa5d8d04ba927744e"],"state_sha256":"c2d766d6f3d721e6d180063ac00c63b52ea0a3ea761cd525f597d128820f6280"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ln3ZjvInRJCdLnQTcC95stxFw2HS/JZSDw6Pbb+LNw/leop6RBkkBKE0pG+g9O+RJAg5pHbgomoy6iXy2UqQDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T09:11:30.652486Z","bundle_sha256":"5f00b58d5239dbbd8bce7f4e73dd238dc651f49f485257b0eb5574812b9ec827"}}