{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ROUR7ERENO2XTXMWINLRLJNL4D","short_pith_number":"pith:ROUR7ERE","canonical_record":{"source":{"id":"2606.28823","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-27T09:15:09Z","cross_cats_sorted":[],"title_canon_sha256":"6fc42e2c1def6f107d9b51fee7de4ff9b240127a0a5237457f2f4cbc9cc0dee0","abstract_canon_sha256":"3edeb68edce58bc3876b4524cc7ca41bcc405981d0f2723e0a444a0ed6db89dd"},"schema_version":"1.0"},"canonical_sha256":"8ba91f92246bb579dd96435715a5abe0dcd082c0d3e4583388f5c4b9fda8a230","source":{"kind":"arxiv","id":"2606.28823","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28823","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28823v1","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28823","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"pith_short_12","alias_value":"ROUR7ERENO2X","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"pith_short_16","alias_value":"ROUR7ERENO2XTXMW","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"pith_short_8","alias_value":"ROUR7ERE","created_at":"2026-06-30T01:16:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ROUR7ERENO2XTXMWINLRLJNL4D","target":"record","payload":{"canonical_record":{"source":{"id":"2606.28823","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-27T09:15:09Z","cross_cats_sorted":[],"title_canon_sha256":"6fc42e2c1def6f107d9b51fee7de4ff9b240127a0a5237457f2f4cbc9cc0dee0","abstract_canon_sha256":"3edeb68edce58bc3876b4524cc7ca41bcc405981d0f2723e0a444a0ed6db89dd"},"schema_version":"1.0"},"canonical_sha256":"8ba91f92246bb579dd96435715a5abe0dcd082c0d3e4583388f5c4b9fda8a230","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-30T01:16:53.307816Z","signature_b64":"EP8XnRRM1wc04a/EseUQh0I2uvm/0jA9Le/gM7xWo2rmQmdQcn6M7BuYsuV9LN1dKs7Ez7rASweOb/RbeGAtDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8ba91f92246bb579dd96435715a5abe0dcd082c0d3e4583388f5c4b9fda8a230","last_reissued_at":"2026-06-30T01:16:53.307138Z","signature_status":"signed_v1","first_computed_at":"2026-06-30T01:16:53.307138Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.28823","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T01:16:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BI7vmbYrI9q1SdPlebXB6W6Q5WvMmcbr5n618yhx4ImkfhNCKHDv26Cz3SQlOZ4874IeByNI1q30IVIhiTIRAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T17:18:06.211835Z"},"content_sha256":"764f1eeb1807f7645c62a813a3af08bd48f05f47a002728ae6da8b7b09de3547","schema_version":"1.0","event_id":"sha256:764f1eeb1807f7645c62a813a3af08bd48f05f47a002728ae6da8b7b09de3547"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ROUR7ERENO2XTXMWINLRLJNL4D","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Labeling Training Data for Entity Matching Using Large Language Models","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Aaron Steiner, Christian Bizer","submitted_at":"2026-06-27T09:15:09Z","abstract_excerpt":"Recent large language models (LLMs) achieve strong performance on entity matching without requiring task-specific training data. However, applying these models to large sets of candidate pairs remains slow and costly. In contrast, entity matchers using traditional machine learning methods or small language models (SLMs), such as RoBERTa, offer much faster inference but require task-specific training data.\n  This paper investigates whether the need to provide task-specific training data can be avoided by using knowledge-distillation workflows, in which an LLM serves as a teacher model to label "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28823","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.28823/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-30T01:16:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Rfl7RYwG9yj+dL34BBN0GkeHsF2ewVLSLs8lCSmNAfMgzX1pbwCyCcl54RRhzp81Js4gnEl0CHexYUSwdn95Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T17:18:06.212209Z"},"content_sha256":"6bf58cb1ccb3e479beedebca0938ea6d3fb6df4098751a124e6dec0717ebfdd8","schema_version":"1.0","event_id":"sha256:6bf58cb1ccb3e479beedebca0938ea6d3fb6df4098751a124e6dec0717ebfdd8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ROUR7ERENO2XTXMWINLRLJNL4D/bundle.json","state_url":"https://pith.science/pith/ROUR7ERENO2XTXMWINLRLJNL4D/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ROUR7ERENO2XTXMWINLRLJNL4D/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T17:18:06Z","links":{"resolver":"https://pith.science/pith/ROUR7ERENO2XTXMWINLRLJNL4D","bundle":"https://pith.science/pith/ROUR7ERENO2XTXMWINLRLJNL4D/bundle.json","state":"https://pith.science/pith/ROUR7ERENO2XTXMWINLRLJNL4D/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ROUR7ERENO2XTXMWINLRLJNL4D/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ROUR7ERENO2XTXMWINLRLJNL4D","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3edeb68edce58bc3876b4524cc7ca41bcc405981d0f2723e0a444a0ed6db89dd","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-27T09:15:09Z","title_canon_sha256":"6fc42e2c1def6f107d9b51fee7de4ff9b240127a0a5237457f2f4cbc9cc0dee0"},"schema_version":"1.0","source":{"id":"2606.28823","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.28823","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"arxiv_version","alias_value":"2606.28823v1","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.28823","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"pith_short_12","alias_value":"ROUR7ERENO2X","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"pith_short_16","alias_value":"ROUR7ERENO2XTXMW","created_at":"2026-06-30T01:16:53Z"},{"alias_kind":"pith_short_8","alias_value":"ROUR7ERE","created_at":"2026-06-30T01:16:53Z"}],"graph_snapshots":[{"event_id":"sha256:6bf58cb1ccb3e479beedebca0938ea6d3fb6df4098751a124e6dec0717ebfdd8","target":"graph","created_at":"2026-06-30T01:16:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.28823/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent large language models (LLMs) achieve strong performance on entity matching without requiring task-specific training data. However, applying these models to large sets of candidate pairs remains slow and costly. In contrast, entity matchers using traditional machine learning methods or small language models (SLMs), such as RoBERTa, offer much faster inference but require task-specific training data.\n  This paper investigates whether the need to provide task-specific training data can be avoided by using knowledge-distillation workflows, in which an LLM serves as a teacher model to label ","authors_text":"Aaron Steiner, Christian Bizer","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-27T09:15:09Z","title":"Labeling Training Data for Entity Matching Using Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.28823","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:764f1eeb1807f7645c62a813a3af08bd48f05f47a002728ae6da8b7b09de3547","target":"record","created_at":"2026-06-30T01:16:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3edeb68edce58bc3876b4524cc7ca41bcc405981d0f2723e0a444a0ed6db89dd","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-27T09:15:09Z","title_canon_sha256":"6fc42e2c1def6f107d9b51fee7de4ff9b240127a0a5237457f2f4cbc9cc0dee0"},"schema_version":"1.0","source":{"id":"2606.28823","kind":"arxiv","version":1}},"canonical_sha256":"8ba91f92246bb579dd96435715a5abe0dcd082c0d3e4583388f5c4b9fda8a230","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8ba91f92246bb579dd96435715a5abe0dcd082c0d3e4583388f5c4b9fda8a230","first_computed_at":"2026-06-30T01:16:53.307138Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-30T01:16:53.307138Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EP8XnRRM1wc04a/EseUQh0I2uvm/0jA9Le/gM7xWo2rmQmdQcn6M7BuYsuV9LN1dKs7Ez7rASweOb/RbeGAtDA==","signature_status":"signed_v1","signed_at":"2026-06-30T01:16:53.307816Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.28823","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:764f1eeb1807f7645c62a813a3af08bd48f05f47a002728ae6da8b7b09de3547","sha256:6bf58cb1ccb3e479beedebca0938ea6d3fb6df4098751a124e6dec0717ebfdd8"],"state_sha256":"31c6d37ef0f24e6938c5ac64b88004a7065b049fe72b8c408dcaac016145dca0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qNlrsnN4DRtnJi5fvB2nLZ4wTid2Qa1QwRs4GennWaWTZf1B/J3WUuoRf+6MRBm4aQgn5dyQmT6QsXG0LIawBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T17:18:06.214297Z","bundle_sha256":"68fe103940c6f327cfc4512410aee6baf2cca884e5be8b483f8ed614e3073f86"}}