{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:FLFBLTZEHMJRUTHF6ADJSBVFYY","short_pith_number":"pith:FLFBLTZE","canonical_record":{"source":{"id":"1804.05020","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2018-04-13T16:39:24Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"ec3eb87a05714bba4af19cb984c396f92ca8dba1563561412ded77f07bcd8eb0","abstract_canon_sha256":"4f281cc98fc5263873576a449fdfb8bcfe48b89f677234460ff3a4f13908fe6e"},"schema_version":"1.0"},"canonical_sha256":"2aca15cf243b131a4ce5f0069906a5c6266366d8cc553033982dcc54c38375fb","source":{"kind":"arxiv","id":"1804.05020","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.05020","created_at":"2026-05-18T00:18:32Z"},{"alias_kind":"arxiv_version","alias_value":"1804.05020v1","created_at":"2026-05-18T00:18:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.05020","created_at":"2026-05-18T00:18:32Z"},{"alias_kind":"pith_short_12","alias_value":"FLFBLTZEHMJR","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"FLFBLTZEHMJRUTHF","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"FLFBLTZE","created_at":"2026-05-18T12:32:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:FLFBLTZEHMJRUTHF6ADJSBVFYY","target":"record","payload":{"canonical_record":{"source":{"id":"1804.05020","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2018-04-13T16:39:24Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"ec3eb87a05714bba4af19cb984c396f92ca8dba1563561412ded77f07bcd8eb0","abstract_canon_sha256":"4f281cc98fc5263873576a449fdfb8bcfe48b89f677234460ff3a4f13908fe6e"},"schema_version":"1.0"},"canonical_sha256":"2aca15cf243b131a4ce5f0069906a5c6266366d8cc553033982dcc54c38375fb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:18:32.480750Z","signature_b64":"Le1h4c8A+0bAlqRCjwWzvcpo/PdLZgGTvMV5fUGOwokAwbRYVIXaq3cgdpaQNje59Q/X3f4IOVyiEblrr+8nAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2aca15cf243b131a4ce5f0069906a5c6266366d8cc553033982dcc54c38375fb","last_reissued_at":"2026-05-18T00:18:32.480421Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:18:32.480421Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.05020","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"84/qfI/r4PbFLqSFOx1xtqofUKXPnqUeCBVUETL7pLwUrgeEek7F0+IuEvYkqeEUQ9tRYKzvx6BFdDeN1Mz9AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T17:57:19.190269Z"},"content_sha256":"521572fc620ae53181ea853baa304efd604bcb6b2ead30adf765629c9a5003a5","schema_version":"1.0","event_id":"sha256:521572fc620ae53181ea853baa304efd604bcb6b2ead30adf765629c9a5003a5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:FLFBLTZEHMJRUTHF6ADJSBVFYY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Deep Learning Approach to Fast, Format-Agnostic Detection of Malicious Web Content","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CR","authors_text":"Cody Wild, Hillary Sanders, Joshua Saxe, Richard Harang","submitted_at":"2018-04-13T16:39:24Z","abstract_excerpt":"Malicious web content is a serious problem on the Internet today. In this paper we propose a deep learning approach to detecting malevolent web pages. While past work on web content detection has relied on syntactic parsing or on emulation of HTML and Javascript to extract features, our approach operates directly on a language-agnostic stream of tokens extracted directly from static HTML files with a simple regular expression. This makes it fast enough to operate in high-frequency data contexts like firewalls and web proxies, and allows it to avoid the attack surface exposure of complex parsin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.05020","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:18:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8xVLugo4IJVYwfIg0JbH/tOSqRzId4/kZ4fBc6w9DTIGLnj3nXxhHxxJH8VmOSuPeJtlv3mZ1sjxYdlqNQjoBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T17:57:19.190940Z"},"content_sha256":"e3be306358595f4bf35d596ab0fedcc5a19f70329c4f758551ece58db17824f0","schema_version":"1.0","event_id":"sha256:e3be306358595f4bf35d596ab0fedcc5a19f70329c4f758551ece58db17824f0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY/bundle.json","state_url":"https://pith.science/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T17:57:19Z","links":{"resolver":"https://pith.science/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY","bundle":"https://pith.science/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY/bundle.json","state":"https://pith.science/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FLFBLTZEHMJRUTHF6ADJSBVFYY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:FLFBLTZEHMJRUTHF6ADJSBVFYY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4f281cc98fc5263873576a449fdfb8bcfe48b89f677234460ff3a4f13908fe6e","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2018-04-13T16:39:24Z","title_canon_sha256":"ec3eb87a05714bba4af19cb984c396f92ca8dba1563561412ded77f07bcd8eb0"},"schema_version":"1.0","source":{"id":"1804.05020","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.05020","created_at":"2026-05-18T00:18:32Z"},{"alias_kind":"arxiv_version","alias_value":"1804.05020v1","created_at":"2026-05-18T00:18:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.05020","created_at":"2026-05-18T00:18:32Z"},{"alias_kind":"pith_short_12","alias_value":"FLFBLTZEHMJR","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_16","alias_value":"FLFBLTZEHMJRUTHF","created_at":"2026-05-18T12:32:22Z"},{"alias_kind":"pith_short_8","alias_value":"FLFBLTZE","created_at":"2026-05-18T12:32:22Z"}],"graph_snapshots":[{"event_id":"sha256:e3be306358595f4bf35d596ab0fedcc5a19f70329c4f758551ece58db17824f0","target":"graph","created_at":"2026-05-18T00:18:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Malicious web content is a serious problem on the Internet today. In this paper we propose a deep learning approach to detecting malevolent web pages. While past work on web content detection has relied on syntactic parsing or on emulation of HTML and Javascript to extract features, our approach operates directly on a language-agnostic stream of tokens extracted directly from static HTML files with a simple regular expression. This makes it fast enough to operate in high-frequency data contexts like firewalls and web proxies, and allows it to avoid the attack surface exposure of complex parsin","authors_text":"Cody Wild, Hillary Sanders, Joshua Saxe, Richard Harang","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2018-04-13T16:39:24Z","title":"A Deep Learning Approach to Fast, Format-Agnostic Detection of Malicious Web Content"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.05020","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:521572fc620ae53181ea853baa304efd604bcb6b2ead30adf765629c9a5003a5","target":"record","created_at":"2026-05-18T00:18:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4f281cc98fc5263873576a449fdfb8bcfe48b89f677234460ff3a4f13908fe6e","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2018-04-13T16:39:24Z","title_canon_sha256":"ec3eb87a05714bba4af19cb984c396f92ca8dba1563561412ded77f07bcd8eb0"},"schema_version":"1.0","source":{"id":"1804.05020","kind":"arxiv","version":1}},"canonical_sha256":"2aca15cf243b131a4ce5f0069906a5c6266366d8cc553033982dcc54c38375fb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2aca15cf243b131a4ce5f0069906a5c6266366d8cc553033982dcc54c38375fb","first_computed_at":"2026-05-18T00:18:32.480421Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:18:32.480421Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Le1h4c8A+0bAlqRCjwWzvcpo/PdLZgGTvMV5fUGOwokAwbRYVIXaq3cgdpaQNje59Q/X3f4IOVyiEblrr+8nAg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:18:32.480750Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.05020","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:521572fc620ae53181ea853baa304efd604bcb6b2ead30adf765629c9a5003a5","sha256:e3be306358595f4bf35d596ab0fedcc5a19f70329c4f758551ece58db17824f0"],"state_sha256":"62091c9e13b4239d57d8addfc86e9d06e5204c047d870493941b37d7c49d1d0a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m83rCuTuRtXoUHMt1fXF4uf/3jDXKw3C09F3PccbFjsKuPYqDfeUO5B9jcWkaScv1sTGSDDFK3jDhrZHuj7LBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T17:57:19.194606Z","bundle_sha256":"44f707b253ca0074d78f7dd398c655f29be5d64465eb3d74509b7b1b5714502b"}}