{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:PZWYOZIHRAYB6AFE2YYSRMK7WX","short_pith_number":"pith:PZWYOZIH","canonical_record":{"source":{"id":"1805.10787","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-05-28T06:57:40Z","cross_cats_sorted":[],"title_canon_sha256":"770c523b62da78a7118ea7f3789a3e065569e44181b4f25dec0630b5e83ece38","abstract_canon_sha256":"c97e09592194f74567e623744a80b4c679af2be993b39fc0db49f46eaa52398e"},"schema_version":"1.0"},"canonical_sha256":"7e6d87650788301f00a4d63128b15fb5e4b238dff49bf72f4dbe437554d44670","source":{"kind":"arxiv","id":"1805.10787","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.10787","created_at":"2026-05-18T00:14:51Z"},{"alias_kind":"arxiv_version","alias_value":"1805.10787v1","created_at":"2026-05-18T00:14:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.10787","created_at":"2026-05-18T00:14:51Z"},{"alias_kind":"pith_short_12","alias_value":"PZWYOZIHRAYB","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"PZWYOZIHRAYB6AFE","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"PZWYOZIH","created_at":"2026-05-18T12:32:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:PZWYOZIHRAYB6AFE2YYSRMK7WX","target":"record","payload":{"canonical_record":{"source":{"id":"1805.10787","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-05-28T06:57:40Z","cross_cats_sorted":[],"title_canon_sha256":"770c523b62da78a7118ea7f3789a3e065569e44181b4f25dec0630b5e83ece38","abstract_canon_sha256":"c97e09592194f74567e623744a80b4c679af2be993b39fc0db49f46eaa52398e"},"schema_version":"1.0"},"canonical_sha256":"7e6d87650788301f00a4d63128b15fb5e4b238dff49bf72f4dbe437554d44670","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:14:51.428305Z","signature_b64":"t2pM7LRxwVT6Zg/PtgPG0e1XiEZiNV8bubHwpNycNrhQraozNR5OxEU+owwMyxa3NdCdqVSEEWw1GNCRj6KsCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7e6d87650788301f00a4d63128b15fb5e4b238dff49bf72f4dbe437554d44670","last_reissued_at":"2026-05-18T00:14:51.427790Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:14:51.427790Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.10787","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qJ+j3B51c6aJmlLgR5bgEvTeC8XM8bHSrI8PjTCAIuPb4o0k5OnN5CfmOEtBN0dcZKd+lcpOSn3v1rgJ5eymDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T23:43:38.972813Z"},"content_sha256":"b63836cfe7bbad0661c5374069d05757e34237e103d3894dae2261e83d17d87a","schema_version":"1.0","event_id":"sha256:b63836cfe7bbad0661c5374069d05757e34237e103d3894dae2261e83d17d87a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:PZWYOZIHRAYB6AFE2YYSRMK7WX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"An empirical study of public data quality problems in cross project defect prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Heli Sun, Junqi Li, Zhongbin Sun","submitted_at":"2018-05-28T06:57:40Z","abstract_excerpt":"Background: Two public defect data, including Jureczko and NASA datasets, have been widely used in cross project defect prediction (CPDP). The quality of defect data have been reported as an important factor influencing the defect prediction performance and Shepperd et al. have researched the data quality problems in NASA datasets. However, up to now, there is no research focusing on the quality problems of Jureczko datasets which are most widely used in CPDP. Aims: In this paper, we intend to investigate the problems of identical and inconsistent cases in Jureczko datasets and validate whethe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.10787","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"25E6r7gltdeiHcTSHpJRG2PGEycoXx3k/SH40fxwzU3e678ca5V+hVRFZwmogTAueUrNKJH42ddZgxvvJMP+Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T23:43:38.973459Z"},"content_sha256":"359fa5e6453db0062378512b76e94400594a84602f87f82993d962ed6f4947e4","schema_version":"1.0","event_id":"sha256:359fa5e6453db0062378512b76e94400594a84602f87f82993d962ed6f4947e4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX/bundle.json","state_url":"https://pith.science/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T23:43:38Z","links":{"resolver":"https://pith.science/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX","bundle":"https://pith.science/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX/bundle.json","state":"https://pith.science/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PZWYOZIHRAYB6AFE2YYSRMK7WX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:PZWYOZIHRAYB6AFE2YYSRMK7WX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c97e09592194f74567e623744a80b4c679af2be993b39fc0db49f46eaa52398e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-05-28T06:57:40Z","title_canon_sha256":"770c523b62da78a7118ea7f3789a3e065569e44181b4f25dec0630b5e83ece38"},"schema_version":"1.0","source":{"id":"1805.10787","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.10787","created_at":"2026-05-18T00:14:51Z"},{"alias_kind":"arxiv_version","alias_value":"1805.10787v1","created_at":"2026-05-18T00:14:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.10787","created_at":"2026-05-18T00:14:51Z"},{"alias_kind":"pith_short_12","alias_value":"PZWYOZIHRAYB","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"PZWYOZIHRAYB6AFE","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"PZWYOZIH","created_at":"2026-05-18T12:32:46Z"}],"graph_snapshots":[{"event_id":"sha256:359fa5e6453db0062378512b76e94400594a84602f87f82993d962ed6f4947e4","target":"graph","created_at":"2026-05-18T00:14:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Background: Two public defect data, including Jureczko and NASA datasets, have been widely used in cross project defect prediction (CPDP). The quality of defect data have been reported as an important factor influencing the defect prediction performance and Shepperd et al. have researched the data quality problems in NASA datasets. However, up to now, there is no research focusing on the quality problems of Jureczko datasets which are most widely used in CPDP. Aims: In this paper, we intend to investigate the problems of identical and inconsistent cases in Jureczko datasets and validate whethe","authors_text":"Heli Sun, Junqi Li, Zhongbin Sun","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-05-28T06:57:40Z","title":"An empirical study of public data quality problems in cross project defect prediction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.10787","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b63836cfe7bbad0661c5374069d05757e34237e103d3894dae2261e83d17d87a","target":"record","created_at":"2026-05-18T00:14:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c97e09592194f74567e623744a80b4c679af2be993b39fc0db49f46eaa52398e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2018-05-28T06:57:40Z","title_canon_sha256":"770c523b62da78a7118ea7f3789a3e065569e44181b4f25dec0630b5e83ece38"},"schema_version":"1.0","source":{"id":"1805.10787","kind":"arxiv","version":1}},"canonical_sha256":"7e6d87650788301f00a4d63128b15fb5e4b238dff49bf72f4dbe437554d44670","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7e6d87650788301f00a4d63128b15fb5e4b238dff49bf72f4dbe437554d44670","first_computed_at":"2026-05-18T00:14:51.427790Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:14:51.427790Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"t2pM7LRxwVT6Zg/PtgPG0e1XiEZiNV8bubHwpNycNrhQraozNR5OxEU+owwMyxa3NdCdqVSEEWw1GNCRj6KsCQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:14:51.428305Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.10787","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b63836cfe7bbad0661c5374069d05757e34237e103d3894dae2261e83d17d87a","sha256:359fa5e6453db0062378512b76e94400594a84602f87f82993d962ed6f4947e4"],"state_sha256":"ca3bf81f5a77d7ee7b90378ffc0cbfa4094d97c4f164bd44cb8499ebb9d78641"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Zn1JXOHXoC8Rq4LHBoQfGxjMSVaoio9IEq0CTAESmpyCmPufeeUBpje/uDCmUV+kqOs034KoYYGzEKkTmwtdBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T23:43:38.976168Z","bundle_sha256":"3ea135c1c3dfdfaa9711c0dc5bda66b777630a908db77ea5959ab7c60714a72f"}}