{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:IGHN3KQ277K6CZ6FYGTXF4GAZS","short_pith_number":"pith:IGHN3KQ2","schema_version":"1.0","canonical_sha256":"418eddaa1affd5e167c5c1a772f0c0cc8c0bb862612d6e3581386969140a20e0","source":{"kind":"arxiv","id":"2605.25739","version":1},"attestation_state":"computed","paper":{"title":"The Behavioral Credibility Trilemma: When Calibrated Autonomy Becomes Impossible","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.GT","stat.ML"],"primary_cat":"cs.LG","authors_text":"Dinesh Kumar Sah, Hassan Mehmood, Lauri Lov\\'en, Nam Do, Sasu Tarkoma","submitted_at":"2026-05-25T11:51:08Z","abstract_excerpt":"We prove that no reinforcement learning policy with confidence-gated autonomy can simultaneously achieve maximum helpfulness, optimal calibration, and full autonomy under rational oversight, whenever some tasks exceed the agent's reliable competence: the Behavioral Credibility Trilemma. The impossibility is geometric -- adding any non-affine autonomy incentive to a strictly proper scoring rule destroys strict properness, so an agent rewarded for both calibrated confidence and autonomous action systematically inflates its reported confidence on tasks below the principal's approval threshold. Th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.25739","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-25T11:51:08Z","cross_cats_sorted":["cs.GT","stat.ML"],"title_canon_sha256":"d3011518fa48bb13bf1677bede29ccec89573886e9368f7a4271d9c2f729ac33","abstract_canon_sha256":"280d4712b0aa9fe876f1da83050702704c03237e2cd6955620030f747cd9cbfe"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:04:52.787134Z","signature_b64":"+q4UXphATSkO+DcLJvouwgCHgYRSRo1fac/u6w1YELLZotBtdSlz6AfX1bfjHycTmpNIxxleDXK3BeS1hD3UBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"418eddaa1affd5e167c5c1a772f0c0cc8c0bb862612d6e3581386969140a20e0","last_reissued_at":"2026-05-26T02:04:52.786561Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:04:52.786561Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Behavioral Credibility Trilemma: When Calibrated Autonomy Becomes Impossible","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.GT","stat.ML"],"primary_cat":"cs.LG","authors_text":"Dinesh Kumar Sah, Hassan Mehmood, Lauri Lov\\'en, Nam Do, Sasu Tarkoma","submitted_at":"2026-05-25T11:51:08Z","abstract_excerpt":"We prove that no reinforcement learning policy with confidence-gated autonomy can simultaneously achieve maximum helpfulness, optimal calibration, and full autonomy under rational oversight, whenever some tasks exceed the agent's reliable competence: the Behavioral Credibility Trilemma. The impossibility is geometric -- adding any non-affine autonomy incentive to a strictly proper scoring rule destroys strict properness, so an agent rewarded for both calibrated confidence and autonomous action systematically inflates its reported confidence on tasks below the principal's approval threshold. Th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.25739","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.25739/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.25739","created_at":"2026-05-26T02:04:52.786645+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.25739v1","created_at":"2026-05-26T02:04:52.786645+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.25739","created_at":"2026-05-26T02:04:52.786645+00:00"},{"alias_kind":"pith_short_12","alias_value":"IGHN3KQ277K6","created_at":"2026-05-26T02:04:52.786645+00:00"},{"alias_kind":"pith_short_16","alias_value":"IGHN3KQ277K6CZ6F","created_at":"2026-05-26T02:04:52.786645+00:00"},{"alias_kind":"pith_short_8","alias_value":"IGHN3KQ2","created_at":"2026-05-26T02:04:52.786645+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS","json":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS.json","graph_json":"https://pith.science/api/pith-number/IGHN3KQ277K6CZ6FYGTXF4GAZS/graph.json","events_json":"https://pith.science/api/pith-number/IGHN3KQ277K6CZ6FYGTXF4GAZS/events.json","paper":"https://pith.science/paper/IGHN3KQ2"},"agent_actions":{"view_html":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS","download_json":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS.json","view_paper":"https://pith.science/paper/IGHN3KQ2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.25739&json=true","fetch_graph":"https://pith.science/api/pith-number/IGHN3KQ277K6CZ6FYGTXF4GAZS/graph.json","fetch_events":"https://pith.science/api/pith-number/IGHN3KQ277K6CZ6FYGTXF4GAZS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS/action/storage_attestation","attest_author":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS/action/author_attestation","sign_citation":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS/action/citation_signature","submit_replication":"https://pith.science/pith/IGHN3KQ277K6CZ6FYGTXF4GAZS/action/replication_record"}},"created_at":"2026-05-26T02:04:52.786645+00:00","updated_at":"2026-05-26T02:04:52.786645+00:00"}