{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:DAZZRDN5UARHGD4ZBMTGGJQI6H","short_pith_number":"pith:DAZZRDN5","schema_version":"1.0","canonical_sha256":"1833988dbda022730f990b26632608f1f30113115a323f9e95346e47f1cb4aa1","source":{"kind":"arxiv","id":"2507.15584","version":2},"attestation_state":"computed","paper":{"title":"We Need to Rethink Benchmarking in Anomaly Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Daniel Schl\\\"or, Emmanuel M\\\"uller, Franz Rothlauf, Kevin Kammler, Philipp R\\\"ochner, Simon Kl\\\"uttermann","submitted_at":"2025-07-21T13:02:49Z","abstract_excerpt":"Despite the continuous proposal of new anomaly detection algorithms and extensive benchmarking efforts, progress seems to stagnate, with only minor performance differences between established baselines and new algorithms. In this position paper, we argue that this stagnation is due to limitations in how we evaluate anomaly detection algorithms. In current benchmarks, a trivial algorithm that only checks for extreme values in individual features performs competitively with state-of-the-art deep learning methods, despite failing on simple cases such as anomalies within an annulus of normal point"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2507.15584","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-07-21T13:02:49Z","cross_cats_sorted":[],"title_canon_sha256":"6577daee06eccd99582bb40a2cdbb21d59845b8457e98930851f618d85a0c20b","abstract_canon_sha256":"f4ae0e4d108cd3680fb221db7dad00b9d09f73013f9f52955d1cefeb04cd4f4f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:12:46.850253Z","signature_b64":"/G589nK4OmnQz4Blai5uY5tEvPxRnWQkwJBaeiRVu9NK03RKMzHMPB1bJZfmDLNAXguQ6z6TgZS1yDY8cPkKAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1833988dbda022730f990b26632608f1f30113115a323f9e95346e47f1cb4aa1","last_reissued_at":"2026-06-19T16:12:46.849822Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:12:46.849822Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"We Need to Rethink Benchmarking in Anomaly Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Daniel Schl\\\"or, Emmanuel M\\\"uller, Franz Rothlauf, Kevin Kammler, Philipp R\\\"ochner, Simon Kl\\\"uttermann","submitted_at":"2025-07-21T13:02:49Z","abstract_excerpt":"Despite the continuous proposal of new anomaly detection algorithms and extensive benchmarking efforts, progress seems to stagnate, with only minor performance differences between established baselines and new algorithms. In this position paper, we argue that this stagnation is due to limitations in how we evaluate anomaly detection algorithms. In current benchmarks, a trivial algorithm that only checks for extreme values in individual features performs competitively with state-of-the-art deep learning methods, despite failing on simple cases such as anomalies within an annulus of normal point"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.15584","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.15584/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2507.15584","created_at":"2026-06-19T16:12:46.849880+00:00"},{"alias_kind":"arxiv_version","alias_value":"2507.15584v2","created_at":"2026-06-19T16:12:46.849880+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.15584","created_at":"2026-06-19T16:12:46.849880+00:00"},{"alias_kind":"pith_short_12","alias_value":"DAZZRDN5UARH","created_at":"2026-06-19T16:12:46.849880+00:00"},{"alias_kind":"pith_short_16","alias_value":"DAZZRDN5UARHGD4Z","created_at":"2026-06-19T16:12:46.849880+00:00"},{"alias_kind":"pith_short_8","alias_value":"DAZZRDN5","created_at":"2026-06-19T16:12:46.849880+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2602.09329","citing_title":"MacrOData: New Benchmarks of Thousands of Datasets for Tabular Outlier Detection","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02519","citing_title":"Evaluating Tabular Representation Learning for Network Intrusion Detection","ref_index":25,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H","json":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H.json","graph_json":"https://pith.science/api/pith-number/DAZZRDN5UARHGD4ZBMTGGJQI6H/graph.json","events_json":"https://pith.science/api/pith-number/DAZZRDN5UARHGD4ZBMTGGJQI6H/events.json","paper":"https://pith.science/paper/DAZZRDN5"},"agent_actions":{"view_html":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H","download_json":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H.json","view_paper":"https://pith.science/paper/DAZZRDN5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2507.15584&json=true","fetch_graph":"https://pith.science/api/pith-number/DAZZRDN5UARHGD4ZBMTGGJQI6H/graph.json","fetch_events":"https://pith.science/api/pith-number/DAZZRDN5UARHGD4ZBMTGGJQI6H/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H/action/storage_attestation","attest_author":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H/action/author_attestation","sign_citation":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H/action/citation_signature","submit_replication":"https://pith.science/pith/DAZZRDN5UARHGD4ZBMTGGJQI6H/action/replication_record"}},"created_at":"2026-06-19T16:12:46.849880+00:00","updated_at":"2026-06-19T16:12:46.849880+00:00"}