{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:HI4WZJMHZOGUHEVOPWNJJ5UA36","short_pith_number":"pith:HI4WZJMH","canonical_record":{"source":{"id":"2605.30729","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:45:45Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"58d4b7912c3f289727d40cfc7e80deebaf0fbd0c3b1e487a1d38000a4f22d997","abstract_canon_sha256":"dc1982548b92b01804f0a9d0dd3e277fafac5ea21fc7a57b79495320ff500f34"},"schema_version":"1.0"},"canonical_sha256":"3a396ca587cb8d4392ae7d9a94f680dfbf760b37425c069c8d1b4bce4d1e3f9e","source":{"kind":"arxiv","id":"2605.30729","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30729","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30729v1","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30729","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_12","alias_value":"HI4WZJMHZOGU","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_16","alias_value":"HI4WZJMHZOGUHEVO","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_8","alias_value":"HI4WZJMH","created_at":"2026-06-01T01:03:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:HI4WZJMHZOGUHEVOPWNJJ5UA36","target":"record","payload":{"canonical_record":{"source":{"id":"2605.30729","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:45:45Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"58d4b7912c3f289727d40cfc7e80deebaf0fbd0c3b1e487a1d38000a4f22d997","abstract_canon_sha256":"dc1982548b92b01804f0a9d0dd3e277fafac5ea21fc7a57b79495320ff500f34"},"schema_version":"1.0"},"canonical_sha256":"3a396ca587cb8d4392ae7d9a94f680dfbf760b37425c069c8d1b4bce4d1e3f9e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:03:12.763950Z","signature_b64":"r6umApIwhFrMdUYU9/BnLBgx10zPsTG+87gFNSUM3LRMSzGw2oK9yUtvJ+ZQtveFimueUtp0i4cr+xyg6nmHAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3a396ca587cb8d4392ae7d9a94f680dfbf760b37425c069c8d1b4bce4d1e3f9e","last_reissued_at":"2026-06-01T01:03:12.762889Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:03:12.762889Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.30729","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:03:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WTDtOhFG/a8/sRYsKv3P3KXYswlP5NnUdRro2DasOCM/Nnipv4GT4cG3TjbnekMyRpviDzdQEjJgKfU50QOKAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T00:29:26.617054Z"},"content_sha256":"06558c9492e4b719f8d6369e8c00dee7cf94b14612975b51a5315183407bbe86","schema_version":"1.0","event_id":"sha256:06558c9492e4b719f8d6369e8c00dee7cf94b14612975b51a5315183407bbe86"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:HI4WZJMHZOGUHEVOPWNJJ5UA36","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SemStruct: Contextualizing Semantic Embeddings with Structural Information for Schema Matching","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.LG","authors_text":"Horst Samulowitz, Inwon Kang, Kavitha Srinivas, Nandana Mihindukulasooriya, Oshani Seneviratne, Parikshit Ram, Sola Shirai","submitted_at":"2026-05-29T01:45:45Z","abstract_excerpt":"Schema matching is a fundamental step in integrating heterogeneous data sources. While Pre-trained Language Models (PLMs) have revolutionized this task by capturing linguistic semantics, they typically process tabular data as serialized text sequences of standalone column descriptions. This serialization discards critical structural information -- specifically, the row-level co-occurrences, i.e. the relational context -- forcing models to rely solely on column header semantics or standalone distributions. To bridge this gap, we propose SemStruct, a framework that joins the semantic power of fr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30729","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30729/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:03:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SAapkevtr8C+9Z3+Xi0H1/1VCQ3BGn0HW8BeCdaaGTuARnxBGUikumPXRkM7TIBbYjWR8YW+KHv1FrjZqmq/Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T00:29:26.617442Z"},"content_sha256":"e4078f5125280fe5e398ec0e313c047b6b3c89102134bb35aa6b6aa7c7d29d7f","schema_version":"1.0","event_id":"sha256:e4078f5125280fe5e398ec0e313c047b6b3c89102134bb35aa6b6aa7c7d29d7f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36/bundle.json","state_url":"https://pith.science/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T00:29:26Z","links":{"resolver":"https://pith.science/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36","bundle":"https://pith.science/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36/bundle.json","state":"https://pith.science/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HI4WZJMHZOGUHEVOPWNJJ5UA36/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:HI4WZJMHZOGUHEVOPWNJJ5UA36","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"dc1982548b92b01804f0a9d0dd3e277fafac5ea21fc7a57b79495320ff500f34","cross_cats_sorted":["cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:45:45Z","title_canon_sha256":"58d4b7912c3f289727d40cfc7e80deebaf0fbd0c3b1e487a1d38000a4f22d997"},"schema_version":"1.0","source":{"id":"2605.30729","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.30729","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"arxiv_version","alias_value":"2605.30729v1","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30729","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_12","alias_value":"HI4WZJMHZOGU","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_16","alias_value":"HI4WZJMHZOGUHEVO","created_at":"2026-06-01T01:03:12Z"},{"alias_kind":"pith_short_8","alias_value":"HI4WZJMH","created_at":"2026-06-01T01:03:12Z"}],"graph_snapshots":[{"event_id":"sha256:e4078f5125280fe5e398ec0e313c047b6b3c89102134bb35aa6b6aa7c7d29d7f","target":"graph","created_at":"2026-06-01T01:03:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.30729/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Schema matching is a fundamental step in integrating heterogeneous data sources. While Pre-trained Language Models (PLMs) have revolutionized this task by capturing linguistic semantics, they typically process tabular data as serialized text sequences of standalone column descriptions. This serialization discards critical structural information -- specifically, the row-level co-occurrences, i.e. the relational context -- forcing models to rely solely on column header semantics or standalone distributions. To bridge this gap, we propose SemStruct, a framework that joins the semantic power of fr","authors_text":"Horst Samulowitz, Inwon Kang, Kavitha Srinivas, Nandana Mihindukulasooriya, Oshani Seneviratne, Parikshit Ram, Sola Shirai","cross_cats":["cs.IR"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:45:45Z","title":"SemStruct: Contextualizing Semantic Embeddings with Structural Information for Schema Matching"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30729","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:06558c9492e4b719f8d6369e8c00dee7cf94b14612975b51a5315183407bbe86","target":"record","created_at":"2026-06-01T01:03:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"dc1982548b92b01804f0a9d0dd3e277fafac5ea21fc7a57b79495320ff500f34","cross_cats_sorted":["cs.IR"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T01:45:45Z","title_canon_sha256":"58d4b7912c3f289727d40cfc7e80deebaf0fbd0c3b1e487a1d38000a4f22d997"},"schema_version":"1.0","source":{"id":"2605.30729","kind":"arxiv","version":1}},"canonical_sha256":"3a396ca587cb8d4392ae7d9a94f680dfbf760b37425c069c8d1b4bce4d1e3f9e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3a396ca587cb8d4392ae7d9a94f680dfbf760b37425c069c8d1b4bce4d1e3f9e","first_computed_at":"2026-06-01T01:03:12.762889Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:12.762889Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"r6umApIwhFrMdUYU9/BnLBgx10zPsTG+87gFNSUM3LRMSzGw2oK9yUtvJ+ZQtveFimueUtp0i4cr+xyg6nmHAA==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:12.763950Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.30729","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:06558c9492e4b719f8d6369e8c00dee7cf94b14612975b51a5315183407bbe86","sha256:e4078f5125280fe5e398ec0e313c047b6b3c89102134bb35aa6b6aa7c7d29d7f"],"state_sha256":"5a1146dfce34d37f7139320260e8760f2102119bec253a545378d9af8e7bbc09"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6+5wXuigAkhNKdGJCCklWUBTNF0FP6aSHRhDd8LwHThDStoANb/sDI2vhbFFiWX4REcanPPGSUP3HMasrIRBAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T00:29:26.619461Z","bundle_sha256":"62066d1d16a691f1c2afaba0e9d63c4372308b57c60ce4fda63efe856eb540ae"}}