{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:IECCTJOMAXXKJ5FJ2ETID6MJE6","short_pith_number":"pith:IECCTJOM","schema_version":"1.0","canonical_sha256":"410429a5cc05eea4f4a9d12681f98927a155fcbe3d43ca9926a78033b9498da7","source":{"kind":"arxiv","id":"2606.16072","version":2},"attestation_state":"computed","paper":{"title":"MASCOT-Android: A Curated Dataset and Automated Collection Pipeline for Android Malware Source Code Specimens","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Bojing Li, Charles Maxa, Charles Nicholas, Duo Zhong, Prajna Bhandary, Raguvir S, Robert J Joyce","submitted_at":"2026-06-15T00:15:04Z","abstract_excerpt":"Compared with binaries and decompiled code, malware source code more directly reflects the attackers' original intent. However, the scarcity of source code and the high cost of manual review make such datasets difficult to build and maintain. We propose MASCOT-Android, a curated dataset of Android malware source code and an automated collection framework for scalable malware source code discovery on GitHub. A key finding of our work is that repository-level documentation alone provides a strong signal for malware source code collection. Our model extracts character-level TF-IDF features from 8"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.16072","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-06-15T00:15:04Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"b4f887f1e2d978292cb3208bea3ca1296d6e7f57e380c4f3fcb96e1b7a9f9001","abstract_canon_sha256":"3e8f9654c092ddf5f14c448a61d9ea97fe820356dae26cb548f56d7ac1da81fa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:10:40.150359Z","signature_b64":"Okwdus7cjZ7sXukm/BH38o/j8xZJhOlD0+9ET2c6194UMHHqlIL6juBFNsqiAeBItzbKGWzFLubBu8M7l47vCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"410429a5cc05eea4f4a9d12681f98927a155fcbe3d43ca9926a78033b9498da7","last_reissued_at":"2026-06-19T16:10:40.149899Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:10:40.149899Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MASCOT-Android: A Curated Dataset and Automated Collection Pipeline for Android Malware Source Code Specimens","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CR","authors_text":"Bojing Li, Charles Maxa, Charles Nicholas, Duo Zhong, Prajna Bhandary, Raguvir S, Robert J Joyce","submitted_at":"2026-06-15T00:15:04Z","abstract_excerpt":"Compared with binaries and decompiled code, malware source code more directly reflects the attackers' original intent. However, the scarcity of source code and the high cost of manual review make such datasets difficult to build and maintain. We propose MASCOT-Android, a curated dataset of Android malware source code and an automated collection framework for scalable malware source code discovery on GitHub. A key finding of our work is that repository-level documentation alone provides a strong signal for malware source code collection. Our model extracts character-level TF-IDF features from 8"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.16072","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.16072/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.16072","created_at":"2026-06-19T16:10:40.149964+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.16072v2","created_at":"2026-06-19T16:10:40.149964+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.16072","created_at":"2026-06-19T16:10:40.149964+00:00"},{"alias_kind":"pith_short_12","alias_value":"IECCTJOMAXXK","created_at":"2026-06-19T16:10:40.149964+00:00"},{"alias_kind":"pith_short_16","alias_value":"IECCTJOMAXXKJ5FJ","created_at":"2026-06-19T16:10:40.149964+00:00"},{"alias_kind":"pith_short_8","alias_value":"IECCTJOM","created_at":"2026-06-19T16:10:40.149964+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6","json":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6.json","graph_json":"https://pith.science/api/pith-number/IECCTJOMAXXKJ5FJ2ETID6MJE6/graph.json","events_json":"https://pith.science/api/pith-number/IECCTJOMAXXKJ5FJ2ETID6MJE6/events.json","paper":"https://pith.science/paper/IECCTJOM"},"agent_actions":{"view_html":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6","download_json":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6.json","view_paper":"https://pith.science/paper/IECCTJOM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.16072&json=true","fetch_graph":"https://pith.science/api/pith-number/IECCTJOMAXXKJ5FJ2ETID6MJE6/graph.json","fetch_events":"https://pith.science/api/pith-number/IECCTJOMAXXKJ5FJ2ETID6MJE6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6/action/storage_attestation","attest_author":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6/action/author_attestation","sign_citation":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6/action/citation_signature","submit_replication":"https://pith.science/pith/IECCTJOMAXXKJ5FJ2ETID6MJE6/action/replication_record"}},"created_at":"2026-06-19T16:10:40.149964+00:00","updated_at":"2026-06-19T16:10:40.149964+00:00"}