{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TRSP3ZKPYZUWYZHM4OUFUYPVMM","short_pith_number":"pith:TRSP3ZKP","schema_version":"1.0","canonical_sha256":"9c64fde54fc6696c64ece3a85a61f563378a6c89effb247dd4c537b24c1d3ae9","source":{"kind":"arxiv","id":"2602.20213","version":2},"attestation_state":"computed","paper":{"title":"CodeHacker: Automated Test Case Generation for Detecting Vulnerabilities in Competitive Programming Solutions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CR"],"primary_cat":"cs.SE","authors_text":"Jing Huang, Jingwei Shi, Jinman Zhao, Shengyu Tao, Xinxiang Yin","submitted_at":"2026-02-23T05:59:30Z","abstract_excerpt":"The evaluation of Large Language Models (LLMs) for code generation relies heavily on the quality and robustness of test cases. However, existing benchmarks often lack coverage for subtle corner cases, allowing incorrect solutions to pass. To bridge this gap, we propose CodeHacker, an automated agent framework dedicated to generating targeted adversarial test cases that expose latent vulnerabilities in program submissions. Mimicking the hack mechanism in competitive programming, CodeHacker employs a multi-strategy approach, including stress testing, anti-hash attacks, and logic-specific targeti"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.20213","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-02-23T05:59:30Z","cross_cats_sorted":["cs.AI","cs.CR"],"title_canon_sha256":"e888b6c230005b95b6c99f5ec3d6a541f059e40d6d6099470a82dce3fda85d90","abstract_canon_sha256":"d6059ac171fef7174917cb2328ffd6b4d767df5d403b56ad1810b8cf97e183ea"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:10.856954Z","signature_b64":"3P0ci0PpOIn+sMg8u9w+fRkWOv+sRP/smAunjg+hKN+WzIMZQwSlxpxJxCuICbL+GmdzazHbvp/k2VbpF9jpCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9c64fde54fc6696c64ece3a85a61f563378a6c89effb247dd4c537b24c1d3ae9","last_reissued_at":"2026-06-03T01:05:10.856449Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:10.856449Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CodeHacker: Automated Test Case Generation for Detecting Vulnerabilities in Competitive Programming Solutions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CR"],"primary_cat":"cs.SE","authors_text":"Jing Huang, Jingwei Shi, Jinman Zhao, Shengyu Tao, Xinxiang Yin","submitted_at":"2026-02-23T05:59:30Z","abstract_excerpt":"The evaluation of Large Language Models (LLMs) for code generation relies heavily on the quality and robustness of test cases. However, existing benchmarks often lack coverage for subtle corner cases, allowing incorrect solutions to pass. To bridge this gap, we propose CodeHacker, an automated agent framework dedicated to generating targeted adversarial test cases that expose latent vulnerabilities in program submissions. Mimicking the hack mechanism in competitive programming, CodeHacker employs a multi-strategy approach, including stress testing, anti-hash attacks, and logic-specific targeti"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.20213","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.20213/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.20213","created_at":"2026-06-03T01:05:10.856509+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.20213v2","created_at":"2026-06-03T01:05:10.856509+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.20213","created_at":"2026-06-03T01:05:10.856509+00:00"},{"alias_kind":"pith_short_12","alias_value":"TRSP3ZKPYZUW","created_at":"2026-06-03T01:05:10.856509+00:00"},{"alias_kind":"pith_short_16","alias_value":"TRSP3ZKPYZUWYZHM","created_at":"2026-06-03T01:05:10.856509+00:00"},{"alias_kind":"pith_short_8","alias_value":"TRSP3ZKP","created_at":"2026-06-03T01:05:10.856509+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2605.15301","citing_title":"Solvita: Enhancing Large Language Models for Competitive Programming via Agentic Evolution","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27998","citing_title":"Latent-GRPO: Group Relative Policy Optimization for Latent Reasoning","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08553","citing_title":"VeriContest: A Competitive-Programming Benchmark for Verifiable Code Generation","ref_index":40,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM","json":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM.json","graph_json":"https://pith.science/api/pith-number/TRSP3ZKPYZUWYZHM4OUFUYPVMM/graph.json","events_json":"https://pith.science/api/pith-number/TRSP3ZKPYZUWYZHM4OUFUYPVMM/events.json","paper":"https://pith.science/paper/TRSP3ZKP"},"agent_actions":{"view_html":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM","download_json":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM.json","view_paper":"https://pith.science/paper/TRSP3ZKP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.20213&json=true","fetch_graph":"https://pith.science/api/pith-number/TRSP3ZKPYZUWYZHM4OUFUYPVMM/graph.json","fetch_events":"https://pith.science/api/pith-number/TRSP3ZKPYZUWYZHM4OUFUYPVMM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM/action/storage_attestation","attest_author":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM/action/author_attestation","sign_citation":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM/action/citation_signature","submit_replication":"https://pith.science/pith/TRSP3ZKPYZUWYZHM4OUFUYPVMM/action/replication_record"}},"created_at":"2026-06-03T01:05:10.856509+00:00","updated_at":"2026-06-03T01:05:10.856509+00:00"}