{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:EG5IL7QVEKHM3LOUXWQ2KDRL4R","short_pith_number":"pith:EG5IL7QV","schema_version":"1.0","canonical_sha256":"21ba85fe15228ecdadd4bda1a50e2be472fd816b1f024ced458c03380c8e6441","source":{"kind":"arxiv","id":"2408.02153","version":2},"attestation_state":"computed","paper":{"title":"ARVO: Atlas of Reproducible Vulnerabilities for Open-Source Software","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Abdelouahab Benchikh, Adam Doup\\'e, Brendan Dolan-Gavitt, Hammond Pearce, Haoran Xi, Jordi Del Castillo, Pulkit Singh Singaria, Ruoyu Wang, Tiffany Bao, Xiang Mei, Yan Shoshitaishvili","submitted_at":"2024-08-04T22:13:14Z","abstract_excerpt":"Achieving reproducibility, quantity, and diversity in vulnerability datasets has long been viewed as an inherent three-way trade-off, where improving one dimension often comes at the cost of the others. In practice, reproducibility has been the dimension most often neglected. This has limited what can be automatically extracted from historical bug datasets, and has reduced their utility for downstream security research.\n  In this work, we propose a method to produce a new security dataset which ensures reproducibility for diverse vulnerabilities at scale by identifying the key obstacles to lar"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2408.02153","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2024-08-04T22:13:14Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"901638715b44f920ee9a232de52293403ad76983a5a67fbc70b79ee705498e8c","abstract_canon_sha256":"72a65011d2c16f796f36bac752a4f938be22fed9de8dfaa199c1c1b1366d13a5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:11:52.558775Z","signature_b64":"aUn7/zIYKA7nwgfB+pXMprJokcS2wIny2JqHrZpRJeCxe4H0o9J5VUFg7WQ+7dAwlgqOY6AoTegeSVMHlkPgDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"21ba85fe15228ecdadd4bda1a50e2be472fd816b1f024ced458c03380c8e6441","last_reissued_at":"2026-06-23T01:11:52.558183Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:11:52.558183Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"ARVO: Atlas of Reproducible Vulnerabilities for Open-Source Software","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CR","authors_text":"Abdelouahab Benchikh, Adam Doup\\'e, Brendan Dolan-Gavitt, Hammond Pearce, Haoran Xi, Jordi Del Castillo, Pulkit Singh Singaria, Ruoyu Wang, Tiffany Bao, Xiang Mei, Yan Shoshitaishvili","submitted_at":"2024-08-04T22:13:14Z","abstract_excerpt":"Achieving reproducibility, quantity, and diversity in vulnerability datasets has long been viewed as an inherent three-way trade-off, where improving one dimension often comes at the cost of the others. In practice, reproducibility has been the dimension most often neglected. This has limited what can be automatically extracted from historical bug datasets, and has reduced their utility for downstream security research.\n  In this work, we propose a method to produce a new security dataset which ensures reproducibility for diverse vulnerabilities at scale by identifying the key obstacles to lar"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2408.02153","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2408.02153/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2408.02153","created_at":"2026-06-23T01:11:52.558270+00:00"},{"alias_kind":"arxiv_version","alias_value":"2408.02153v2","created_at":"2026-06-23T01:11:52.558270+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2408.02153","created_at":"2026-06-23T01:11:52.558270+00:00"},{"alias_kind":"pith_short_12","alias_value":"EG5IL7QVEKHM","created_at":"2026-06-23T01:11:52.558270+00:00"},{"alias_kind":"pith_short_16","alias_value":"EG5IL7QVEKHM3LOU","created_at":"2026-06-23T01:11:52.558270+00:00"},{"alias_kind":"pith_short_8","alias_value":"EG5IL7QV","created_at":"2026-06-23T01:11:52.558270+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":7,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"2605.15097","citing_title":"Veritas: A Semantically Grounded Agentic Framework for Memory Corruption Vulnerability Detection in Binaries","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14153","citing_title":"ExploitBench: A Capability Ladder Benchmark for LLM Cybersecurity Agents","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03851","citing_title":"Beyond Crash-to-Patch: Patch Evolution for Linux Kernel Repair","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2604.10250","citing_title":"Organizational Security Resource Estimation via Vulnerability Queueing","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2604.10427","citing_title":"A Queueing-Theoretic Framework for Dynamic Attack Surfaces: Data-Integrated Risk Analysis and Adaptive Defense","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07624","citing_title":"Program Analysis Guided LLM Agent for Proof-of-Concept Generation","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06618","citing_title":"PoC-Adapt: Semantic-Aware Automated Vulnerability Reproduction with LLM Multi-Agents and Reinforcement Learning-Driven Adaptive Policy","ref_index":11,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R","json":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R.json","graph_json":"https://pith.science/api/pith-number/EG5IL7QVEKHM3LOUXWQ2KDRL4R/graph.json","events_json":"https://pith.science/api/pith-number/EG5IL7QVEKHM3LOUXWQ2KDRL4R/events.json","paper":"https://pith.science/paper/EG5IL7QV"},"agent_actions":{"view_html":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R","download_json":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R.json","view_paper":"https://pith.science/paper/EG5IL7QV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2408.02153&json=true","fetch_graph":"https://pith.science/api/pith-number/EG5IL7QVEKHM3LOUXWQ2KDRL4R/graph.json","fetch_events":"https://pith.science/api/pith-number/EG5IL7QVEKHM3LOUXWQ2KDRL4R/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R/action/storage_attestation","attest_author":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R/action/author_attestation","sign_citation":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R/action/citation_signature","submit_replication":"https://pith.science/pith/EG5IL7QVEKHM3LOUXWQ2KDRL4R/action/replication_record"}},"created_at":"2026-06-23T01:11:52.558270+00:00","updated_at":"2026-06-23T01:11:52.558270+00:00"}