{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:WBFN7T4AAXQSS5J4IW7PZ23HWR","short_pith_number":"pith:WBFN7T4A","schema_version":"1.0","canonical_sha256":"b04adfcf8005e129753c45befceb67b45c5bb7a640e32e17065fad14637af96b","source":{"kind":"arxiv","id":"1804.08985","version":1},"attestation_state":"computed","paper":{"title":"On-Demand Big Data Integration: A Hybrid ETL Approach for Reproducible Scientific Research","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Ashish Sharma, Helena Galhardas, Lu{\\i}s Veiga, Peter Van Roy, Pradeeban Kathiravelu","submitted_at":"2018-04-24T12:27:06Z","abstract_excerpt":"Scientific research requires access, analysis, and sharing of data that is distributed across various heterogeneous data sources at the scale of the Internet. An eager ETL process constructs an integrated data repository as its first step, integrating and loading data in its entirety from the data sources. The bootstrapping of this process is not efficient for scientific research that requires access to data from very large and typically numerous distributed data sources. a lazy ETL process loads only the metadata, but still eagerly. Lazy ETL is faster in bootstrapping. However, queries on the"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1804.08985","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-04-24T12:27:06Z","cross_cats_sorted":[],"title_canon_sha256":"bb761c0597974d03febf6838aad8ede30971c4cb0f1ff9739103744f64ec53ab","abstract_canon_sha256":"3799b4b84aec86d939d47712a41307b9ca0e3b250ac5631b088da4bef9c69538"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:36.698001Z","signature_b64":"Apzq7tsZE7CanKQqpftDYAxRm/ZHjRDlLopPXBIkmdoy03gyfpISJ8SHpU1/sK+5twNYDGtgwgqn3CfCdgbGAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b04adfcf8005e129753c45befceb67b45c5bb7a640e32e17065fad14637af96b","last_reissued_at":"2026-05-18T00:17:36.697404Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:36.697404Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"On-Demand Big Data Integration: A Hybrid ETL Approach for Reproducible Scientific Research","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Ashish Sharma, Helena Galhardas, Lu{\\i}s Veiga, Peter Van Roy, Pradeeban Kathiravelu","submitted_at":"2018-04-24T12:27:06Z","abstract_excerpt":"Scientific research requires access, analysis, and sharing of data that is distributed across various heterogeneous data sources at the scale of the Internet. An eager ETL process constructs an integrated data repository as its first step, integrating and loading data in its entirety from the data sources. The bootstrapping of this process is not efficient for scientific research that requires access to data from very large and typically numerous distributed data sources. a lazy ETL process loads only the metadata, but still eagerly. Lazy ETL is faster in bootstrapping. However, queries on the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.08985","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1804.08985","created_at":"2026-05-18T00:17:36.697485+00:00"},{"alias_kind":"arxiv_version","alias_value":"1804.08985v1","created_at":"2026-05-18T00:17:36.697485+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.08985","created_at":"2026-05-18T00:17:36.697485+00:00"},{"alias_kind":"pith_short_12","alias_value":"WBFN7T4AAXQS","created_at":"2026-05-18T12:32:59.047623+00:00"},{"alias_kind":"pith_short_16","alias_value":"WBFN7T4AAXQSS5J4","created_at":"2026-05-18T12:32:59.047623+00:00"},{"alias_kind":"pith_short_8","alias_value":"WBFN7T4A","created_at":"2026-05-18T12:32:59.047623+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR","json":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR.json","graph_json":"https://pith.science/api/pith-number/WBFN7T4AAXQSS5J4IW7PZ23HWR/graph.json","events_json":"https://pith.science/api/pith-number/WBFN7T4AAXQSS5J4IW7PZ23HWR/events.json","paper":"https://pith.science/paper/WBFN7T4A"},"agent_actions":{"view_html":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR","download_json":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR.json","view_paper":"https://pith.science/paper/WBFN7T4A","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1804.08985&json=true","fetch_graph":"https://pith.science/api/pith-number/WBFN7T4AAXQSS5J4IW7PZ23HWR/graph.json","fetch_events":"https://pith.science/api/pith-number/WBFN7T4AAXQSS5J4IW7PZ23HWR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR/action/storage_attestation","attest_author":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR/action/author_attestation","sign_citation":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR/action/citation_signature","submit_replication":"https://pith.science/pith/WBFN7T4AAXQSS5J4IW7PZ23HWR/action/replication_record"}},"created_at":"2026-05-18T00:17:36.697485+00:00","updated_at":"2026-05-18T00:17:36.697485+00:00"}