{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:QRNI77FCOEHXGEQGPXHOML3FLM","short_pith_number":"pith:QRNI77FC","schema_version":"1.0","canonical_sha256":"845a8ffca2710f7312067dcee62f655b225814e040b5ec2ab9e36c7e257f251e","source":{"kind":"arxiv","id":"1803.04585","version":4},"attestation_state":"computed","paper":{"title":"Categorizing Variants of Goodhart's Law","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["q-fin.GN","stat.ML"],"primary_cat":"cs.AI","authors_text":"David Manheim, Scott Garrabrant","submitted_at":"2018-03-13T01:15:39Z","abstract_excerpt":"There are several distinct failure modes for overoptimization of systems on the basis of metrics. This occurs when a metric which can be used to improve a system is used to an extent that further optimization is ineffective or harmful, and is sometimes termed Goodhart's Law. This class of failure is often poorly understood, partly because terminology for discussing them is ambiguous, and partly because discussion using this ambiguous terminology ignores distinctions between different failure modes of this general type. This paper expands on an earlier discussion by Garrabrant, which notes ther"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1803.04585","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2018-03-13T01:15:39Z","cross_cats_sorted":["q-fin.GN","stat.ML"],"title_canon_sha256":"a606eaf08141b3f62dce14ad2bc40accbf013a12fbb3830a62bad4a862d6c7e9","abstract_canon_sha256":"0a93593125bf6359c26c0ca2cadfc763a797eac551910c55616abefb7c432aa5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:53.316849Z","signature_b64":"ilQ0clA5fzWfAa4fRkaxo1c4qGyC4Y2vnIuCt9BkWYZ3IW8x/nxRQHCiUhmToNgo1Igf7/x8gSry+B/lcwReAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"845a8ffca2710f7312067dcee62f655b225814e040b5ec2ab9e36c7e257f251e","last_reissued_at":"2026-05-17T23:52:53.316046Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:53.316046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Categorizing Variants of Goodhart's Law","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["q-fin.GN","stat.ML"],"primary_cat":"cs.AI","authors_text":"David Manheim, Scott Garrabrant","submitted_at":"2018-03-13T01:15:39Z","abstract_excerpt":"There are several distinct failure modes for overoptimization of systems on the basis of metrics. This occurs when a metric which can be used to improve a system is used to an extent that further optimization is ineffective or harmful, and is sometimes termed Goodhart's Law. This class of failure is often poorly understood, partly because terminology for discussing them is ambiguous, and partly because discussion using this ambiguous terminology ignores distinctions between different failure modes of this general type. This paper expands on an earlier discussion by Garrabrant, which notes ther"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.04585","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1803.04585","created_at":"2026-05-17T23:52:53.316183+00:00"},{"alias_kind":"arxiv_version","alias_value":"1803.04585v4","created_at":"2026-05-17T23:52:53.316183+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.04585","created_at":"2026-05-17T23:52:53.316183+00:00"},{"alias_kind":"pith_short_12","alias_value":"QRNI77FCOEHX","created_at":"2026-05-18T12:32:46.962924+00:00"},{"alias_kind":"pith_short_16","alias_value":"QRNI77FCOEHXGEQG","created_at":"2026-05-18T12:32:46.962924+00:00"},{"alias_kind":"pith_short_8","alias_value":"QRNI77FC","created_at":"2026-05-18T12:32:46.962924+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":17,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"2502.11981","citing_title":"Welfare as a Guiding Principle for Machine Learning -- From Compass, to Lens, to Roadmap","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22612","citing_title":"Healthcare LLM Benchmarks Are Only as Good as Their Explicit Assumptions","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21384","citing_title":"SpecBench: Measuring Reward Hacking in Long-Horizon Coding Agents","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2512.03048","citing_title":"The Specification Trap: Why Static Value Alignment Alone Is Insufficient for Robust Alignment","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2602.13934","citing_title":"Why Code, Why Now: An Information-Theoretic Perspective on the Limits of Machine Learning","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"1906.01820","citing_title":"Risks from Learned Optimization in Advanced Machine Learning Systems","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14407","citing_title":"Metis AI: The Overlooked Middle Zone Between AI-Native and World-Movers","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11496","citing_title":"The Evaluation Differential: When Frontier AI Models Recognise They Are Being Tested","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06390","citing_title":"Automated alignment is harder than you think","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07728","citing_title":"SARC: A Governance-by-Architecture Framework for Agentic AI Systems","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07671","citing_title":"The Endogeneity of Miscalibration: Impossibility and Escape in Scored Reporting","ref_index":43,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07709","citing_title":"IatroBench: Pre-Registered Evidence of Iatrogenic Harm from AI Safety Measures","ref_index":21,"is_internal_anchor":false},{"citing_arxiv_id":"2604.05274","citing_title":"Simulating the Evolution of Alignment and Values in Machine Intelligence","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2604.16516","citing_title":"Operationalizing Fairness in Text-to-Image Models: A Survey of Bias, Fairness Audits and Mitigation Strategies","ref_index":24,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17769","citing_title":"Reverse Constitutional AI: A Framework for Controllable Toxic Data Generation via Probability-Clamped RLAIF","ref_index":56,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17989","citing_title":"AIT Academy: Cultivating the Complete Agent with a Confucian Three-Domain Curriculum","ref_index":25,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15596","citing_title":"Privacy, Prediction, and Allocation","ref_index":50,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM","json":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM.json","graph_json":"https://pith.science/api/pith-number/QRNI77FCOEHXGEQGPXHOML3FLM/graph.json","events_json":"https://pith.science/api/pith-number/QRNI77FCOEHXGEQGPXHOML3FLM/events.json","paper":"https://pith.science/paper/QRNI77FC"},"agent_actions":{"view_html":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM","download_json":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM.json","view_paper":"https://pith.science/paper/QRNI77FC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1803.04585&json=true","fetch_graph":"https://pith.science/api/pith-number/QRNI77FCOEHXGEQGPXHOML3FLM/graph.json","fetch_events":"https://pith.science/api/pith-number/QRNI77FCOEHXGEQGPXHOML3FLM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM/action/storage_attestation","attest_author":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM/action/author_attestation","sign_citation":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM/action/citation_signature","submit_replication":"https://pith.science/pith/QRNI77FCOEHXGEQGPXHOML3FLM/action/replication_record"}},"created_at":"2026-05-17T23:52:53.316183+00:00","updated_at":"2026-05-17T23:52:53.316183+00:00"}