{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QPLYZBTWV6PXYYWWHYRMPT2JYR","short_pith_number":"pith:QPLYZBTW","canonical_record":{"source":{"id":"2606.21787","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-06-19T22:36:21Z","cross_cats_sorted":[],"title_canon_sha256":"20812b07e7ab88590ce9d7a16594ac0492f72e19c3c5c364d48b57c8a0e5d995","abstract_canon_sha256":"79797844cf20d77f41d6cfd11d982d0f0089f42491b1e173b7ce321ff42145d6"},"schema_version":"1.0"},"canonical_sha256":"83d78c8676af9f7c62d63e22c7cf49c450cd19aecc5904248147f88f18870e38","source":{"kind":"arxiv","id":"2606.21787","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21787","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21787v1","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21787","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"pith_short_12","alias_value":"QPLYZBTWV6PX","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"pith_short_16","alias_value":"QPLYZBTWV6PXYYWW","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"pith_short_8","alias_value":"QPLYZBTW","created_at":"2026-06-23T01:13:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QPLYZBTWV6PXYYWWHYRMPT2JYR","target":"record","payload":{"canonical_record":{"source":{"id":"2606.21787","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-06-19T22:36:21Z","cross_cats_sorted":[],"title_canon_sha256":"20812b07e7ab88590ce9d7a16594ac0492f72e19c3c5c364d48b57c8a0e5d995","abstract_canon_sha256":"79797844cf20d77f41d6cfd11d982d0f0089f42491b1e173b7ce321ff42145d6"},"schema_version":"1.0"},"canonical_sha256":"83d78c8676af9f7c62d63e22c7cf49c450cd19aecc5904248147f88f18870e38","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:13:22.693445Z","signature_b64":"yEGQNKlCx9ll7/G8sXecujA/sMQjj5WD6/SE4yEFf1w3/n5JQUFfVWB6+0aKU2q5ocQLmDvnk/zX6g3QCMZmDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"83d78c8676af9f7c62d63e22c7cf49c450cd19aecc5904248147f88f18870e38","last_reissued_at":"2026-06-23T01:13:22.692918Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:13:22.692918Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.21787","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:13:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MMb+xmb813qpDeZhrCSKCOQXSiP/wIb6Sy2jGDcExZXTvyw2DFzYJmj1NlGU7QnuZFK2Iww/zG8BRtDA4sKTDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T05:29:17.460120Z"},"content_sha256":"7616b95dec4877a1d1380a5f436fcd6050ce0b22985f46066b85a0371a594bdb","schema_version":"1.0","event_id":"sha256:7616b95dec4877a1d1380a5f436fcd6050ce0b22985f46066b85a0371a594bdb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QPLYZBTWV6PXYYWWHYRMPT2JYR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards Imputation of Pre-Trained Language Model Metadata using Semantic Fingerprinting","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Adekunle Ajibode, Ahmed E. Hassan, Bram Adams, Keheliya Gallaba, Oussama Ben Sghaier","submitted_at":"2026-06-19T22:36:21Z","abstract_excerpt":"Pre-trained language models (PTLMs) hosted on platforms such as Hugging Face form complex lineage structures similar to software dependency graphs. However, unlike traditional software ecosystems, PTLM repositories often lack reliable provenance due to missing metadata, such as licenses, reuse methods, pipeline tags, model types, and training libraries. To address this gap, we introduce Semantic Fingerprinting (SemFin), a lightweight approach that combines Hugging Face (HF) configuration files with model repository tags to automatically impute missing model metadata fields and reconstruct mode"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21787","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21787/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:13:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"t7Ht9o5d0q74XcaPXxWXyx2RitAjE0sDFuvzLMQkqPiaAql38JEOyv2wCw8DvljZTTwY778nYTCeD1YAxKjOCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T05:29:17.460500Z"},"content_sha256":"00bd52bbb976fde543f8a05d31e9de02362ad0a849c949fb89ac3f4d31246e56","schema_version":"1.0","event_id":"sha256:00bd52bbb976fde543f8a05d31e9de02362ad0a849c949fb89ac3f4d31246e56"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR/bundle.json","state_url":"https://pith.science/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T05:29:17Z","links":{"resolver":"https://pith.science/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR","bundle":"https://pith.science/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR/bundle.json","state":"https://pith.science/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QPLYZBTWV6PXYYWWHYRMPT2JYR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QPLYZBTWV6PXYYWWHYRMPT2JYR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"79797844cf20d77f41d6cfd11d982d0f0089f42491b1e173b7ce321ff42145d6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-06-19T22:36:21Z","title_canon_sha256":"20812b07e7ab88590ce9d7a16594ac0492f72e19c3c5c364d48b57c8a0e5d995"},"schema_version":"1.0","source":{"id":"2606.21787","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21787","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21787v1","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21787","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"pith_short_12","alias_value":"QPLYZBTWV6PX","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"pith_short_16","alias_value":"QPLYZBTWV6PXYYWW","created_at":"2026-06-23T01:13:22Z"},{"alias_kind":"pith_short_8","alias_value":"QPLYZBTW","created_at":"2026-06-23T01:13:22Z"}],"graph_snapshots":[{"event_id":"sha256:00bd52bbb976fde543f8a05d31e9de02362ad0a849c949fb89ac3f4d31246e56","target":"graph","created_at":"2026-06-23T01:13:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.21787/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pre-trained language models (PTLMs) hosted on platforms such as Hugging Face form complex lineage structures similar to software dependency graphs. However, unlike traditional software ecosystems, PTLM repositories often lack reliable provenance due to missing metadata, such as licenses, reuse methods, pipeline tags, model types, and training libraries. To address this gap, we introduce Semantic Fingerprinting (SemFin), a lightweight approach that combines Hugging Face (HF) configuration files with model repository tags to automatically impute missing model metadata fields and reconstruct mode","authors_text":"Adekunle Ajibode, Ahmed E. Hassan, Bram Adams, Keheliya Gallaba, Oussama Ben Sghaier","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-06-19T22:36:21Z","title":"Towards Imputation of Pre-Trained Language Model Metadata using Semantic Fingerprinting"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21787","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7616b95dec4877a1d1380a5f436fcd6050ce0b22985f46066b85a0371a594bdb","target":"record","created_at":"2026-06-23T01:13:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"79797844cf20d77f41d6cfd11d982d0f0089f42491b1e173b7ce321ff42145d6","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-06-19T22:36:21Z","title_canon_sha256":"20812b07e7ab88590ce9d7a16594ac0492f72e19c3c5c364d48b57c8a0e5d995"},"schema_version":"1.0","source":{"id":"2606.21787","kind":"arxiv","version":1}},"canonical_sha256":"83d78c8676af9f7c62d63e22c7cf49c450cd19aecc5904248147f88f18870e38","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"83d78c8676af9f7c62d63e22c7cf49c450cd19aecc5904248147f88f18870e38","first_computed_at":"2026-06-23T01:13:22.692918Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T01:13:22.692918Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yEGQNKlCx9ll7/G8sXecujA/sMQjj5WD6/SE4yEFf1w3/n5JQUFfVWB6+0aKU2q5ocQLmDvnk/zX6g3QCMZmDA==","signature_status":"signed_v1","signed_at":"2026-06-23T01:13:22.693445Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.21787","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7616b95dec4877a1d1380a5f436fcd6050ce0b22985f46066b85a0371a594bdb","sha256:00bd52bbb976fde543f8a05d31e9de02362ad0a849c949fb89ac3f4d31246e56"],"state_sha256":"2b4f4cce88bb1ffdcf9f19855dfe34054d995325853cf100710e3f897e397b80"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HnKckGsJKLoNWTP8LdjgSvPeE7nK5RoorbTMAuPwRYWHVhCPNbORqSC3nTLhLNz4tmcA/2Ip14BLUElHjn+UAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T05:29:17.462898Z","bundle_sha256":"8370d737572dbe270875ce3aedf644f1f63a1a91b272ac3b8d237760161a84ec"}}