{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TVQUCMNG3TAP6UQQ3ZP5INE4ME","short_pith_number":"pith:TVQUCMNG","schema_version":"1.0","canonical_sha256":"9d614131a6dcc0ff5210de5fd4349c6116431904fb2eeea0b465f18059f75cac","source":{"kind":"arxiv","id":"2606.26155","version":1},"attestation_state":"computed","paper":{"title":"Detecting and Controlling Sycophancy with Cascading Linear Features","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chris Bregler, Maty Bohacek, Nicholas Dufour, Rishub Jain, Roma Patel, Thomas Leung","submitted_at":"2026-06-23T20:10:53Z","abstract_excerpt":"Interpreting and controlling model behaviors through activation steering methods requires many pairs of contrastive samples that clearly exhibit desired or undesired behavior. These data pairs determine the degree to which interpretability frameworks can reliably detect model features responsible for a behavior, and therefore the ability to steer models toward or away from such behavior. In this work, we present an iterative data generation pipeline that isolates cascading linear features responsible for a behavior. Specifically, we show how moving beyond simple binary pairs of samples, and in"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.26155","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-23T20:10:53Z","cross_cats_sorted":[],"title_canon_sha256":"518aaa0dc732829f6a8b0ddfc45413001f6f87296af33075d3bb669dc767bc26","abstract_canon_sha256":"fa35162ecc407015117ae44256f66481cda3a3168f39b2ae5a9ec1943c229041"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-26T00:15:27.442672Z","signature_b64":"dWiGLwt2jmFrkkQcYtXiHBChMe2GQPIzNLXeBxVnM4X4twND7GDNniSVatcX2lj7g6hosoUgttbOHxYuKr/tAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9d614131a6dcc0ff5210de5fd4349c6116431904fb2eeea0b465f18059f75cac","last_reissued_at":"2026-06-26T00:15:27.442135Z","signature_status":"signed_v1","first_computed_at":"2026-06-26T00:15:27.442135Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Detecting and Controlling Sycophancy with Cascading Linear Features","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chris Bregler, Maty Bohacek, Nicholas Dufour, Rishub Jain, Roma Patel, Thomas Leung","submitted_at":"2026-06-23T20:10:53Z","abstract_excerpt":"Interpreting and controlling model behaviors through activation steering methods requires many pairs of contrastive samples that clearly exhibit desired or undesired behavior. These data pairs determine the degree to which interpretability frameworks can reliably detect model features responsible for a behavior, and therefore the ability to steer models toward or away from such behavior. In this work, we present an iterative data generation pipeline that isolates cascading linear features responsible for a behavior. Specifically, we show how moving beyond simple binary pairs of samples, and in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.26155","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.26155/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.26155","created_at":"2026-06-26T00:15:27.442202+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.26155v1","created_at":"2026-06-26T00:15:27.442202+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.26155","created_at":"2026-06-26T00:15:27.442202+00:00"},{"alias_kind":"pith_short_12","alias_value":"TVQUCMNG3TAP","created_at":"2026-06-26T00:15:27.442202+00:00"},{"alias_kind":"pith_short_16","alias_value":"TVQUCMNG3TAP6UQQ","created_at":"2026-06-26T00:15:27.442202+00:00"},{"alias_kind":"pith_short_8","alias_value":"TVQUCMNG","created_at":"2026-06-26T00:15:27.442202+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME","json":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME.json","graph_json":"https://pith.science/api/pith-number/TVQUCMNG3TAP6UQQ3ZP5INE4ME/graph.json","events_json":"https://pith.science/api/pith-number/TVQUCMNG3TAP6UQQ3ZP5INE4ME/events.json","paper":"https://pith.science/paper/TVQUCMNG"},"agent_actions":{"view_html":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME","download_json":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME.json","view_paper":"https://pith.science/paper/TVQUCMNG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.26155&json=true","fetch_graph":"https://pith.science/api/pith-number/TVQUCMNG3TAP6UQQ3ZP5INE4ME/graph.json","fetch_events":"https://pith.science/api/pith-number/TVQUCMNG3TAP6UQQ3ZP5INE4ME/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME/action/storage_attestation","attest_author":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME/action/author_attestation","sign_citation":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME/action/citation_signature","submit_replication":"https://pith.science/pith/TVQUCMNG3TAP6UQQ3ZP5INE4ME/action/replication_record"}},"created_at":"2026-06-26T00:15:27.442202+00:00","updated_at":"2026-06-26T00:15:27.442202+00:00"}