{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:KDNHC5QKRXD3RFG4JTOLHYIZ3U","short_pith_number":"pith:KDNHC5QK","canonical_record":{"source":{"id":"2605.20500","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-19T21:12:36Z","cross_cats_sorted":[],"title_canon_sha256":"7701ef376910fe924b41101b31cd4e444fe0bf5ce1e38736234f0bf6e1dec8b6","abstract_canon_sha256":"42d95a0dc8360b1178851d29f1b18c2a1fc98abb32be079ff198534386e560e8"},"schema_version":"1.0"},"canonical_sha256":"50da71760a8dc7b894dc4cdcb3e119dd32498fc73de778f9446ea3fc574756e0","source":{"kind":"arxiv","id":"2605.20500","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20500","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20500v1","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20500","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"pith_short_12","alias_value":"KDNHC5QKRXD3","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"pith_short_16","alias_value":"KDNHC5QKRXD3RFG4","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"pith_short_8","alias_value":"KDNHC5QK","created_at":"2026-05-21T01:04:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:KDNHC5QKRXD3RFG4JTOLHYIZ3U","target":"record","payload":{"canonical_record":{"source":{"id":"2605.20500","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-19T21:12:36Z","cross_cats_sorted":[],"title_canon_sha256":"7701ef376910fe924b41101b31cd4e444fe0bf5ce1e38736234f0bf6e1dec8b6","abstract_canon_sha256":"42d95a0dc8360b1178851d29f1b18c2a1fc98abb32be079ff198534386e560e8"},"schema_version":"1.0"},"canonical_sha256":"50da71760a8dc7b894dc4cdcb3e119dd32498fc73de778f9446ea3fc574756e0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:39.954625Z","signature_b64":"QIOrBIdIrYVnwvxsKhJVAp2icmr5chSjEM+GDvLUgKc6Td6htow1/5+N4abr7xFNs2KOUwAtw8hpb7uM1twsBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"50da71760a8dc7b894dc4cdcb3e119dd32498fc73de778f9446ea3fc574756e0","last_reissued_at":"2026-05-21T01:04:39.954194Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:39.954194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.20500","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"orzd4xBe7ed+DnOofWghfZPiGSZq2wGSW5ymtftdRmJGHDfqWIYVUmQ/AAVmxhvwYObENflKccWWI/uaUJD9DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T17:26:23.948352Z"},"content_sha256":"08ca7f7c8906ae95c25de5cea7701d6e5a47cf84ae15ccf0825963cf96d97d66","schema_version":"1.0","event_id":"sha256:08ca7f7c8906ae95c25de5cea7701d6e5a47cf84ae15ccf0825963cf96d97d66"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:KDNHC5QKRXD3RFG4JTOLHYIZ3U","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Multi-Layer Testing Framework for Automated Data Quality Assurance in Cloud-Native ELT Pipelines","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Hassan Reza, Ismail Gargouri","submitted_at":"2026-05-19T21:12:36Z","abstract_excerpt":"Ensuring data quality in cloud-native Extract-Load-Transform (ELT) pipelines is increasingly challenging due to heterogeneous data sources, evolving schemas, and multi-backend execution environments. This paper presents a unified, multi-layer testing framework that integrates orchestration-level validation, declarative dbt tests, large language model (LLM)-generated semantic tests, and cross-store consistency checking between DuckDB and Snowflake, orchestrated through Apache Airflow. Controlled anomaly-injection experiments demonstrate that a manual-only baseline detected 7 of 16 injected anom"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20500","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20500/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AUD7f/H5LEzGyDCUjiGjbAO/MQ/1Tu+iTQUSZhfw1NW/Guin1yfg/FsZSacP2EAm7dcQ388UqVmJvRSrbg+qCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T17:26:23.949039Z"},"content_sha256":"41dbbf4690df75004f29c585fa81af4498762466ed5ebf0579f050d1950ce0b6","schema_version":"1.0","event_id":"sha256:41dbbf4690df75004f29c585fa81af4498762466ed5ebf0579f050d1950ce0b6"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:KDNHC5QKRXD3RFG4JTOLHYIZ3U","target":"integrity","payload":{"note":"Identifier '10.1109/access.2024.3353678' is syntactically valid but the DOI registry (doi.org) returned 404, and Crossref / OpenAlex / internal corpus also have no record. The cited work could not be located through any authoritative source.","snippet":"Ridzuan, N., Idrus, M., Mahdin, H.: A review of data -quality dimensions for big data. IEEE Access 12, 11258 –11275 (2024). https://doi.org/10.1109/ACCESS.2024.3353678","arxiv_id":"2605.20500","detector":"doi_compliance","evidence":{"doi":"10.1109/access.2024.3353678","arxiv_id":null,"ref_index":11,"raw_excerpt":"Ridzuan, N., Idrus, M., Mahdin, H.: A review of data -quality dimensions for big data. IEEE Access 12, 11258 –11275 (2024). https://doi.org/10.1109/ACCESS.2024.3353678","verdict_class":"cross_source","checked_sources":["crossref_by_doi","openalex_by_doi","doi_org_head"]},"severity":"critical","ref_index":11,"audited_at":"2026-05-21T06:43:37.477561Z","event_type":"pith.integrity.v1","detected_doi":"10.1109/access.2024.3353678","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"unresolvable_identifier","evidence_hash":"fa218d1346d8ae4caf71c34e859e497e5643b14f1b587394daa9c06e51fdd506","paper_version":1,"verdict_class":"cross_source","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":5807,"payload_sha256":"8a5bc8c5a2894da85fe8a39abd256446f6e85da0f723d11842a78419d87a5e5e","signature_b64":"aP1Y1hLXE2J7zuxHum0cciiRkkhBjNQdYxcM+GUxLjYfoYZtxdMhWIA01iKcHM9b5yy5nV4QCNDHC/WXx8BUCw==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T06:44:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Y1ggncXzmHNGtWKr8dwLpgILc4n9EiPdg1ml/4+CTX+C06rFs2H8/QjW2s7mZBp9muGBT8iYi1iuCXM2zix/BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T17:26:23.950197Z"},"content_sha256":"bc5b80465fce64a2451537bc7d3198050e7330f7e7abb97d47d47f96bc35a804","schema_version":"1.0","event_id":"sha256:bc5b80465fce64a2451537bc7d3198050e7330f7e7abb97d47d47f96bc35a804"},{"event_type":"integrity_finding","subject_pith_number":"pith:2026:KDNHC5QKRXD3RFG4JTOLHYIZ3U","target":"integrity","payload":{"note":"Identifier '10.1007/978-0-12-410398-7' is syntactically valid but the DOI registry (doi.org) returned 404, and Crossref / OpenAlex / internal corpus also have no record. The cited work could not be located through any authoritative source.","snippet":"Felderer, M., et al.: Testing data -intensive software systems. In: Perspectives on Data Science for Software Engineering, pp. 181 –200. Springer, Cham (2019). https://doi.org/10.1007/978-0-12-410398-7","arxiv_id":"2605.20500","detector":"doi_compliance","evidence":{"doi":"10.1007/978-0-12-410398-7","arxiv_id":null,"ref_index":10,"raw_excerpt":"Felderer, M., et al.: Testing data -intensive software systems. In: Perspectives on Data Science for Software Engineering, pp. 181 –200. Springer, Cham (2019). https://doi.org/10.1007/978-0-12-410398-7","verdict_class":"cross_source","checked_sources":["crossref_by_doi","openalex_by_doi","doi_org_head"]},"severity":"critical","ref_index":10,"audited_at":"2026-05-21T06:43:37.477561Z","event_type":"pith.integrity.v1","detected_doi":"10.1007/978-0-12-410398-7","detector_url":"https://pith.science/pith-integrity-protocol#doi_compliance","external_url":null,"finding_type":"unresolvable_identifier","evidence_hash":"c4c826dc9e30df2dd0a9b7fbd73a8f5174b7a379e8866c96e23967e13c6615e5","paper_version":1,"verdict_class":"cross_source","resolved_title":null,"detector_version":"1.0.0","detected_arxiv_id":null,"integrity_event_id":5806,"payload_sha256":"ce631b19672f130e0f2ab08ad8e79850999c16ae8c9571618c133f45a722b6ea","signature_b64":"JTdVDKi8LTdhE1iE42iNyd7gQiObbUnLV74TKL96o0aa7vYpA5+GREY4LpUAsE4j53ipOJdPEH944JfOshVhCA==","signing_key_id":"pith-v1-2026-05"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T06:44:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"D4eDvSi+OAeChqXHjyLh3NF92sk5wupEXgkQRLV03h0BkLtGrJDt0pBRZv0yefkrWpKOiCPBmdvkMMsI89YpBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T17:26:23.950523Z"},"content_sha256":"9b796a17145d03ac2d4c7b09fcde869f65ed21a3756a58f17b0fe41f9d148067","schema_version":"1.0","event_id":"sha256:9b796a17145d03ac2d4c7b09fcde869f65ed21a3756a58f17b0fe41f9d148067"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U/bundle.json","state_url":"https://pith.science/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T17:26:23Z","links":{"resolver":"https://pith.science/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U","bundle":"https://pith.science/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U/bundle.json","state":"https://pith.science/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KDNHC5QKRXD3RFG4JTOLHYIZ3U/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:KDNHC5QKRXD3RFG4JTOLHYIZ3U","merge_version":"pith-open-graph-merge-v1","event_count":4,"valid_event_count":4,"invalid_event_count":0,"equivocation_count":1,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"42d95a0dc8360b1178851d29f1b18c2a1fc98abb32be079ff198534386e560e8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-19T21:12:36Z","title_canon_sha256":"7701ef376910fe924b41101b31cd4e444fe0bf5ce1e38736234f0bf6e1dec8b6"},"schema_version":"1.0","source":{"id":"2605.20500","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20500","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20500v1","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20500","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"pith_short_12","alias_value":"KDNHC5QKRXD3","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"pith_short_16","alias_value":"KDNHC5QKRXD3RFG4","created_at":"2026-05-21T01:04:39Z"},{"alias_kind":"pith_short_8","alias_value":"KDNHC5QK","created_at":"2026-05-21T01:04:39Z"}],"graph_snapshots":[{"event_id":"sha256:41dbbf4690df75004f29c585fa81af4498762466ed5ebf0579f050d1950ce0b6","target":"graph","created_at":"2026-05-21T01:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.20500/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Ensuring data quality in cloud-native Extract-Load-Transform (ELT) pipelines is increasingly challenging due to heterogeneous data sources, evolving schemas, and multi-backend execution environments. This paper presents a unified, multi-layer testing framework that integrates orchestration-level validation, declarative dbt tests, large language model (LLM)-generated semantic tests, and cross-store consistency checking between DuckDB and Snowflake, orchestrated through Apache Airflow. Controlled anomaly-injection experiments demonstrate that a manual-only baseline detected 7 of 16 injected anom","authors_text":"Hassan Reza, Ismail Gargouri","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-19T21:12:36Z","title":"A Multi-Layer Testing Framework for Automated Data Quality Assurance in Cloud-Native ELT Pipelines"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20500","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:08ca7f7c8906ae95c25de5cea7701d6e5a47cf84ae15ccf0825963cf96d97d66","target":"record","created_at":"2026-05-21T01:04:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"42d95a0dc8360b1178851d29f1b18c2a1fc98abb32be079ff198534386e560e8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-05-19T21:12:36Z","title_canon_sha256":"7701ef376910fe924b41101b31cd4e444fe0bf5ce1e38736234f0bf6e1dec8b6"},"schema_version":"1.0","source":{"id":"2605.20500","kind":"arxiv","version":1}},"canonical_sha256":"50da71760a8dc7b894dc4cdcb3e119dd32498fc73de778f9446ea3fc574756e0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"50da71760a8dc7b894dc4cdcb3e119dd32498fc73de778f9446ea3fc574756e0","first_computed_at":"2026-05-21T01:04:39.954194Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:39.954194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QIOrBIdIrYVnwvxsKhJVAp2icmr5chSjEM+GDvLUgKc6Td6htow1/5+N4abr7xFNs2KOUwAtw8hpb7uM1twsBA==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:39.954625Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.20500","source_kind":"arxiv","source_version":1}}},"equivocations":[{"signer_id":"pith.science","event_type":"integrity_finding","target":"integrity","event_ids":["sha256:9b796a17145d03ac2d4c7b09fcde869f65ed21a3756a58f17b0fe41f9d148067","sha256:bc5b80465fce64a2451537bc7d3198050e7330f7e7abb97d47d47f96bc35a804"]}],"invalid_events":[],"applied_event_ids":["sha256:08ca7f7c8906ae95c25de5cea7701d6e5a47cf84ae15ccf0825963cf96d97d66","sha256:41dbbf4690df75004f29c585fa81af4498762466ed5ebf0579f050d1950ce0b6"],"state_sha256":"4e7ab4f7501461e15e56c9664be2cd39a2dba87ae4c0223694d45e8aaba1a325"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yrf4H0vZpnPxLNKxXNnWdyNwiXx5+XatDG65CbSsafi5UfCWb34bbny5w4OrNlawM9bUPmhrz/tBfKXnV0JCBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T17:26:23.953150Z","bundle_sha256":"9fd9bda799bc88c67bfaca55b14ab7bd070ee94d5b731c7fc5256f25f0185541"}}