{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:N6EZ6AJSJIPNRRUHTAH4BSXANG","short_pith_number":"pith:N6EZ6AJS","schema_version":"1.0","canonical_sha256":"6f899f01324a1ed8c687980fc0cae069b71a49d814411a825d1cbc9d825eaf37","source":{"kind":"arxiv","id":"2605.15345","version":1},"attestation_state":"computed","paper":{"title":"Topical Shifts in the Dark Web: A Longitudinal Analysis of Content from the Cybercrime Ecosystem","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Dark web cybercrime discussions concentrate 75% of their volume in a small set of persistent core topics that last a median of 75 months.","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Irdin Pekaric, Luca Allodi, Maximilian Schafer, Philipp Zech, Raffaela Groner, Roy Ricaldi","submitted_at":"2026-05-14T19:14:53Z","abstract_excerpt":"The dark web hosts a dynamic ecosystem of cybercrime forums and marketplaces that adapt to law enforcement pressure, technological change, and economic incentives. Prior research has extracted cyber threat intelligence from these platforms using static snapshots, with limited attention to how discussions evolve over time. In this study, we conduct a longitudinal analysis of 25,065 websites in the dark web using 11,403,638 HTML snapshots (approximately 1245.38 GB) collected over six years. We develop a longitudinal topic-modeling framework combining domain-specific embeddings, density-based clu"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.15345","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CR","submitted_at":"2026-05-14T19:14:53Z","cross_cats_sorted":[],"title_canon_sha256":"c5c1fd099ef8c604eb8d938da6dc661dc44d9d044e2578fcf87caee96a9c8aae","abstract_canon_sha256":"2ec8eeba59bf08dfee022acefd617dd72f373e6816ea11d7989261cb13633fe6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:53.617614Z","signature_b64":"5OdtD9i/FOAU1RYC6mTkTFTU+pN8pizJJpXxZGbf73ORfVsAtTi/todfmz21AvxBYS6HSgUWKMscRMw1RfILBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6f899f01324a1ed8c687980fc0cae069b71a49d814411a825d1cbc9d825eaf37","last_reissued_at":"2026-05-20T00:00:53.616821Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:53.616821Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Topical Shifts in the Dark Web: A Longitudinal Analysis of Content from the Cybercrime Ecosystem","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Dark web cybercrime discussions concentrate 75% of their volume in a small set of persistent core topics that last a median of 75 months.","cross_cats":[],"primary_cat":"cs.CR","authors_text":"Irdin Pekaric, Luca Allodi, Maximilian Schafer, Philipp Zech, Raffaela Groner, Roy Ricaldi","submitted_at":"2026-05-14T19:14:53Z","abstract_excerpt":"The dark web hosts a dynamic ecosystem of cybercrime forums and marketplaces that adapt to law enforcement pressure, technological change, and economic incentives. Prior research has extracted cyber threat intelligence from these platforms using static snapshots, with limited attention to how discussions evolve over time. In this study, we conduct a longitudinal analysis of 25,065 websites in the dark web using 11,403,638 HTML snapshots (approximately 1245.38 GB) collected over six years. We develop a longitudinal topic-modeling framework combining domain-specific embeddings, density-based clu"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"approximately 75% of total discussion volume is concentrated in a small set of persistent core topics, while short-lived themes account for approximately 3% of activity. The median topic lifespan is 75 months, indicating gradual thematic evolution rather than abrupt replacement.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The longitudinal topic-modeling framework that combines domain-specific embeddings, density-based clustering and temporal aggregation correctly identifies thematic clusters and measures their prevalence and lifespan at the website level without major distortion from snapshot collection biases or hyperparameter choices.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Longitudinal topic modeling on a large dark web dataset finds 75% of discussion volume in persistent core topics with a median lifespan of 75 months and only 3% in short-lived themes.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Dark web cybercrime discussions concentrate 75% of their volume in a small set of persistent core topics that last a median of 75 months.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"7441a4aa2d40e15efddfb5ee436259e88f1d80a0c87cf3d06bd129eed29dda1a"},"source":{"id":"2605.15345","kind":"arxiv","version":1},"verdict":{"id":"57300cd2-dd46-4ca5-9312-85aae2738194","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T14:36:23.837877Z","strongest_claim":"approximately 75% of total discussion volume is concentrated in a small set of persistent core topics, while short-lived themes account for approximately 3% of activity. The median topic lifespan is 75 months, indicating gradual thematic evolution rather than abrupt replacement.","one_line_summary":"Longitudinal topic modeling on a large dark web dataset finds 75% of discussion volume in persistent core topics with a median lifespan of 75 months and only 3% in short-lived themes.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The longitudinal topic-modeling framework that combines domain-specific embeddings, density-based clustering and temporal aggregation correctly identifies thematic clusters and measures their prevalence and lifespan at the website level without major distortion from snapshot collection biases or hyperparameter choices.","pith_extraction_headline":"Dark web cybercrime discussions concentrate 75% of their volume in a small set of persistent core topics that last a median of 75 months."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15345/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T15:01:17.712565Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T14:50:24.842087Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T14:21:54.207585Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.753197Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"27d333657e580c41800068b4c264fc4bf394de2626a604023da52edb77e76548"},"references":{"count":67,"sample":[{"doi":"","year":2020,"title":"Relevance of the deep web to academic research,","work_id":"3d904add-2462-42c7-9158-1009031b5715","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"“Tor metrics,” 2025. [Online]. Available: https:// metrics.torproject.org/","work_id":"f64a45eb-7ad0-482f-a1d2-551a6818c3b6","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"A review of dark web: Trends and future directions,","work_id":"0fbe99bf-b328-4c0e-bcd6-fa40abd67fad","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"The Dark Side of the Web: Towards Understanding Various Data Sources in Cyber Threat Intelligence ,","work_id":"87926dd2-4631-4a0a-b084-ca53f60ac6e7","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"J. Robertson, A. Diab, E. Marin, E. Nunes, V . Paliath, J. Shakar- ian, and P. Shakarian,Darkweb cyber threat intelligence mining. Cambridge University Press, 2017","work_id":"3d56bffc-092c-43a0-8473-943a2276cc08","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":67,"snapshot_sha256":"c05d573cc7ea54a9db72637ee5950c9bf754aa804b3ca605c2041eb2db145ec2","internal_anchors":2},"formal_canon":{"evidence_count":2,"snapshot_sha256":"c646df2741098375fa45a32ebaf535daf9c0239797001a4e97ddace808cd89ba"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15345","created_at":"2026-05-20T00:00:53.616939+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15345v1","created_at":"2026-05-20T00:00:53.616939+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15345","created_at":"2026-05-20T00:00:53.616939+00:00"},{"alias_kind":"pith_short_12","alias_value":"N6EZ6AJSJIPN","created_at":"2026-05-20T00:00:53.616939+00:00"},{"alias_kind":"pith_short_16","alias_value":"N6EZ6AJSJIPNRRUH","created_at":"2026-05-20T00:00:53.616939+00:00"},{"alias_kind":"pith_short_8","alias_value":"N6EZ6AJS","created_at":"2026-05-20T00:00:53.616939+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG","json":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG.json","graph_json":"https://pith.science/api/pith-number/N6EZ6AJSJIPNRRUHTAH4BSXANG/graph.json","events_json":"https://pith.science/api/pith-number/N6EZ6AJSJIPNRRUHTAH4BSXANG/events.json","paper":"https://pith.science/paper/N6EZ6AJS"},"agent_actions":{"view_html":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG","download_json":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG.json","view_paper":"https://pith.science/paper/N6EZ6AJS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15345&json=true","fetch_graph":"https://pith.science/api/pith-number/N6EZ6AJSJIPNRRUHTAH4BSXANG/graph.json","fetch_events":"https://pith.science/api/pith-number/N6EZ6AJSJIPNRRUHTAH4BSXANG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG/action/storage_attestation","attest_author":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG/action/author_attestation","sign_citation":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG/action/citation_signature","submit_replication":"https://pith.science/pith/N6EZ6AJSJIPNRRUHTAH4BSXANG/action/replication_record"}},"created_at":"2026-05-20T00:00:53.616939+00:00","updated_at":"2026-05-20T00:00:53.616939+00:00"}