{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:7VOGYQEWISFINUGPUOPPBWCGQM","short_pith_number":"pith:7VOGYQEW","schema_version":"1.0","canonical_sha256":"fd5c6c4096448a86d0cfa39ef0d8468324dbc326a7bf7ed096e6aafe8fe25c3b","source":{"kind":"arxiv","id":"1906.08076","version":1},"attestation_state":"computed","paper":{"title":"Growth and Duplication of Public Source Code over Time: Provenance Tracking at Scale","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Guillaume Rousseau (UPD7), Roberto Di Cosmo (IRIF), Stefano Zacchiroli (IRIF)","submitted_at":"2019-06-19T12:52:13Z","abstract_excerpt":"We study the evolution of the largest known corpus of publicly available source code, i.e., the Software Heritage archive (4B unique source code files, 1B commits capturing their development histories across 50M software projects). On such corpus we quantify the growth rate of original, never-seen-before source code files and commits. We find the growth rates to be exponential over a period of more than 40 years.We then estimate the multiplication factor, i.e., how much the same artifacts (e.g., files or commits) appear in different contexts (e.g., commits or source code distribution places). "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.08076","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SE","submitted_at":"2019-06-19T12:52:13Z","cross_cats_sorted":[],"title_canon_sha256":"1527fb2655912f6e300222210ec016958c4d0f4342077451e28e39b980158424","abstract_canon_sha256":"6849209dfbc59bde7a7d65c5c2602d9d140dbd68ff7392cd3b6fce08a62e8905"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:54.649449Z","signature_b64":"iq1pQ1Y6AM3WYPLqLPMoWF/dWS+rQU516kv9bnDfDGib1lsd9zt72i6TYeapi5ERzGyWxRfGCtdoxOYUtXHBCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fd5c6c4096448a86d0cfa39ef0d8468324dbc326a7bf7ed096e6aafe8fe25c3b","last_reissued_at":"2026-05-17T23:42:54.648388Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:54.648388Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Growth and Duplication of Public Source Code over Time: Provenance Tracking at Scale","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Guillaume Rousseau (UPD7), Roberto Di Cosmo (IRIF), Stefano Zacchiroli (IRIF)","submitted_at":"2019-06-19T12:52:13Z","abstract_excerpt":"We study the evolution of the largest known corpus of publicly available source code, i.e., the Software Heritage archive (4B unique source code files, 1B commits capturing their development histories across 50M software projects). On such corpus we quantify the growth rate of original, never-seen-before source code files and commits. We find the growth rates to be exponential over a period of more than 40 years.We then estimate the multiplication factor, i.e., how much the same artifacts (e.g., files or commits) appear in different contexts (e.g., commits or source code distribution places). "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.08076","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.08076","created_at":"2026-05-17T23:42:54.648457+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.08076v1","created_at":"2026-05-17T23:42:54.648457+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.08076","created_at":"2026-05-17T23:42:54.648457+00:00"},{"alias_kind":"pith_short_12","alias_value":"7VOGYQEWISFI","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_16","alias_value":"7VOGYQEWISFINUGP","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_8","alias_value":"7VOGYQEW","created_at":"2026-05-18T12:33:12.712433+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM","json":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM.json","graph_json":"https://pith.science/api/pith-number/7VOGYQEWISFINUGPUOPPBWCGQM/graph.json","events_json":"https://pith.science/api/pith-number/7VOGYQEWISFINUGPUOPPBWCGQM/events.json","paper":"https://pith.science/paper/7VOGYQEW"},"agent_actions":{"view_html":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM","download_json":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM.json","view_paper":"https://pith.science/paper/7VOGYQEW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.08076&json=true","fetch_graph":"https://pith.science/api/pith-number/7VOGYQEWISFINUGPUOPPBWCGQM/graph.json","fetch_events":"https://pith.science/api/pith-number/7VOGYQEWISFINUGPUOPPBWCGQM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM/action/storage_attestation","attest_author":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM/action/author_attestation","sign_citation":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM/action/citation_signature","submit_replication":"https://pith.science/pith/7VOGYQEWISFINUGPUOPPBWCGQM/action/replication_record"}},"created_at":"2026-05-17T23:42:54.648457+00:00","updated_at":"2026-05-17T23:42:54.648457+00:00"}