{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:2U7RVSTIX3OCS7G3XDRD66NW7A","short_pith_number":"pith:2U7RVSTI","canonical_record":{"source":{"id":"2605.03217","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-04T23:12:32Z","cross_cats_sorted":["cs.CY"],"title_canon_sha256":"3052527ba04e593aad9f68352d09a30bfa650a0a3c9eeac963323c24ad3cc7ca","abstract_canon_sha256":"f551c6ae015815f3ec2a903e619a098b646a1c74c3f608a740f3bd91a991dc95"},"schema_version":"1.0"},"canonical_sha256":"d53f1aca68bedc297cdbb8e23f79b6f83fe8f3ff51a5e6cfbeb572dcbe7c6078","source":{"kind":"arxiv","id":"2605.03217","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.03217","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"arxiv_version","alias_value":"2605.03217v2","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.03217","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_12","alias_value":"2U7RVSTIX3OC","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_16","alias_value":"2U7RVSTIX3OCS7G3","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_8","alias_value":"2U7RVSTI","created_at":"2026-06-05T01:14:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:2U7RVSTIX3OCS7G3XDRD66NW7A","target":"record","payload":{"canonical_record":{"source":{"id":"2605.03217","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-04T23:12:32Z","cross_cats_sorted":["cs.CY"],"title_canon_sha256":"3052527ba04e593aad9f68352d09a30bfa650a0a3c9eeac963323c24ad3cc7ca","abstract_canon_sha256":"f551c6ae015815f3ec2a903e619a098b646a1c74c3f608a740f3bd91a991dc95"},"schema_version":"1.0"},"canonical_sha256":"d53f1aca68bedc297cdbb8e23f79b6f83fe8f3ff51a5e6cfbeb572dcbe7c6078","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:14:39.973087Z","signature_b64":"ULw6ELs+PkJFNQjsfUpxY0bx63RKZr7Dwe8DHO20XyW+z99CsITpD/ckAEiBUY1Ukof9AUXhFuIs/j1iUm9vDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d53f1aca68bedc297cdbb8e23f79b6f83fe8f3ff51a5e6cfbeb572dcbe7c6078","last_reissued_at":"2026-06-05T01:14:39.972307Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:14:39.972307Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.03217","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:14:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QnEtsX4s2d2+aIcJqfSKOZ+/9xMUpE4syNrJ74PZSJioj8/7BKA6B4581LnoavKqgJRWLNb0+7Taui3TMMsYAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T22:43:00.560028Z"},"content_sha256":"95b7c4225fa5b40c254a78050f041afddf775a1f6acb74e1e196f26388da67c7","schema_version":"1.0","event_id":"sha256:95b7c4225fa5b40c254a78050f041afddf775a1f6acb74e1e196f26388da67c7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:2U7RVSTIX3OCS7G3XDRD66NW7A","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Moral Sensitivity in LLMs: A Tiered Evaluation of Contextual Bias via Behavioral Profiling and Mechanistic Interpretability","license":"http://creativecommons.org/licenses/by/4.0/","headline":"LLMs follow a U-curve in criminal bias: strong in small models, removed by instruction tuning, and restored by reasoning distillation at the same scale.","cross_cats":["cs.CY"],"primary_cat":"cs.LG","authors_text":"Aman Chadha, Atmika Gorti, Krishnaprasad Thirunarayan, Manas Gaur, Vinija Jain, Yash Aggarwal","submitted_at":"2026-05-04T23:12:32Z","abstract_excerpt":"Large language models (LLMs) are increasingly deployed in settings that require nuanced ethical reasoning, yet existing bias evaluations treat model outputs as simply \"biased\" or \"unbiased.\" This binary framing misses the gradual, context-sensitive way bias actually emerges. We address this gap in two stages: behavioral profiling and mechanistic validation. In the behavioral stage, we introduce the Moral Sensitivity Index (MSI), a metric that quantifies the probability of biased output across a graduated, seven-tier stress test ranging from abstract numerical problems to scenarios rooted in hi"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Circuit-level analysis reveals a U-curve of bias: SLMs exhibit strong criminal bias; scaling to instruction-tuned models eliminates it; reasoning distillation reintroduces bias to SLM-like levels despite identical parameter counts, suggesting distillation compresses reasoning traces in ways that reactivate shallow statistical associations.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the chosen criminal-bias scenarios and interpretability probes (logit lens, attention analysis, activation patching, semantic probing) isolate bias circuits without confounding from prompt wording, model scale, or other unmeasured factors.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"LLMs exhibit context-sensitive moral bias with model-specific patterns; mechanistic analysis shows a U-curve in which instruction tuning removes bias but reasoning distillation reintroduces it despite unchanged size.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"LLMs follow a U-curve in criminal bias: strong in small models, removed by instruction tuning, and restored by reasoning distillation at the same scale.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"aedd85c18b50412dfcddcda5e5b54b6dabfef6dbe88a942beea39894c1da5471"},"source":{"id":"2605.03217","kind":"arxiv","version":2},"verdict":{"id":"35e2322a-9d1b-4823-804d-218253b66d43","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-08T18:29:46.979513Z","strongest_claim":"Circuit-level analysis reveals a U-curve of bias: SLMs exhibit strong criminal bias; scaling to instruction-tuned models eliminates it; reasoning distillation reintroduces bias to SLM-like levels despite identical parameter counts, suggesting distillation compresses reasoning traces in ways that reactivate shallow statistical associations.","one_line_summary":"LLMs exhibit context-sensitive moral bias with model-specific patterns; mechanistic analysis shows a U-curve in which instruction tuning removes bias but reasoning distillation reintroduces it despite unchanged size.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the chosen criminal-bias scenarios and interpretability probes (logit lens, attention analysis, activation patching, semantic probing) isolate bias circuits without confounding from prompt wording, model scale, or other unmeasured factors.","pith_extraction_headline":"LLMs follow a U-curve in criminal bias: strong in small models, removed by instruction tuning, and restored by reasoning distillation at the same scale."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.03217/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-20T14:35:29.028682Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-20T01:31:21.837189Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T15:34:29.377334Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"032364ba77ef6a71c7b3a89d6fd302b07dc1e1b0bb6ae1d9bc89a903d75930d4"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"6d6f28c791d0532dd819fef4d42b9454db36a03847461e409080becef0427e4c"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"35e2322a-9d1b-4823-804d-218253b66d43"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:14:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ibt42cbPMDpegCtCd+aNJLi/q8D0vrUnbDDmCqhTDoVS+7TwqrhH936/uwYMpvfnLFYCpuUjmok0NlyvIqCUBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T22:43:00.560937Z"},"content_sha256":"74aecf456330e63c1f23e9512c63cd7383b26e9dce9e925a11db7d54cc3c9bd9","schema_version":"1.0","event_id":"sha256:74aecf456330e63c1f23e9512c63cd7383b26e9dce9e925a11db7d54cc3c9bd9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2U7RVSTIX3OCS7G3XDRD66NW7A/bundle.json","state_url":"https://pith.science/pith/2U7RVSTIX3OCS7G3XDRD66NW7A/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2U7RVSTIX3OCS7G3XDRD66NW7A/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T22:43:00Z","links":{"resolver":"https://pith.science/pith/2U7RVSTIX3OCS7G3XDRD66NW7A","bundle":"https://pith.science/pith/2U7RVSTIX3OCS7G3XDRD66NW7A/bundle.json","state":"https://pith.science/pith/2U7RVSTIX3OCS7G3XDRD66NW7A/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2U7RVSTIX3OCS7G3XDRD66NW7A/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:2U7RVSTIX3OCS7G3XDRD66NW7A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f551c6ae015815f3ec2a903e619a098b646a1c74c3f608a740f3bd91a991dc95","cross_cats_sorted":["cs.CY"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-04T23:12:32Z","title_canon_sha256":"3052527ba04e593aad9f68352d09a30bfa650a0a3c9eeac963323c24ad3cc7ca"},"schema_version":"1.0","source":{"id":"2605.03217","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.03217","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"arxiv_version","alias_value":"2605.03217v2","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.03217","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_12","alias_value":"2U7RVSTIX3OC","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_16","alias_value":"2U7RVSTIX3OCS7G3","created_at":"2026-06-05T01:14:39Z"},{"alias_kind":"pith_short_8","alias_value":"2U7RVSTI","created_at":"2026-06-05T01:14:39Z"}],"graph_snapshots":[{"event_id":"sha256:74aecf456330e63c1f23e9512c63cd7383b26e9dce9e925a11db7d54cc3c9bd9","target":"graph","created_at":"2026-06-05T01:14:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Circuit-level analysis reveals a U-curve of bias: SLMs exhibit strong criminal bias; scaling to instruction-tuned models eliminates it; reasoning distillation reintroduces bias to SLM-like levels despite identical parameter counts, suggesting distillation compresses reasoning traces in ways that reactivate shallow statistical associations."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the chosen criminal-bias scenarios and interpretability probes (logit lens, attention analysis, activation patching, semantic probing) isolate bias circuits without confounding from prompt wording, model scale, or other unmeasured factors."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"LLMs exhibit context-sensitive moral bias with model-specific patterns; mechanistic analysis shows a U-curve in which instruction tuning removes bias but reasoning distillation reintroduces it despite unchanged size."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"LLMs follow a U-curve in criminal bias: strong in small models, removed by instruction tuning, and restored by reasoning distillation at the same scale."}],"snapshot_sha256":"aedd85c18b50412dfcddcda5e5b54b6dabfef6dbe88a942beea39894c1da5471"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"6d6f28c791d0532dd819fef4d42b9454db36a03847461e409080becef0427e4c"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-20T14:35:29.028682Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-20T01:31:21.837189Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T15:34:29.377334Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.03217/integrity.json","findings":[],"snapshot_sha256":"032364ba77ef6a71c7b3a89d6fd302b07dc1e1b0bb6ae1d9bc89a903d75930d4","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models (LLMs) are increasingly deployed in settings that require nuanced ethical reasoning, yet existing bias evaluations treat model outputs as simply \"biased\" or \"unbiased.\" This binary framing misses the gradual, context-sensitive way bias actually emerges. We address this gap in two stages: behavioral profiling and mechanistic validation. In the behavioral stage, we introduce the Moral Sensitivity Index (MSI), a metric that quantifies the probability of biased output across a graduated, seven-tier stress test ranging from abstract numerical problems to scenarios rooted in hi","authors_text":"Aman Chadha, Atmika Gorti, Krishnaprasad Thirunarayan, Manas Gaur, Vinija Jain, Yash Aggarwal","cross_cats":["cs.CY"],"headline":"LLMs follow a U-curve in criminal bias: strong in small models, removed by instruction tuning, and restored by reasoning distillation at the same scale.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-04T23:12:32Z","title":"Moral Sensitivity in LLMs: A Tiered Evaluation of Contextual Bias via Behavioral Profiling and Mechanistic Interpretability"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.03217","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-08T18:29:46.979513Z","id":"35e2322a-9d1b-4823-804d-218253b66d43","model_set":{"reader":"grok-4.3"},"one_line_summary":"LLMs exhibit context-sensitive moral bias with model-specific patterns; mechanistic analysis shows a U-curve in which instruction tuning removes bias but reasoning distillation reintroduces it despite unchanged size.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"LLMs follow a U-curve in criminal bias: strong in small models, removed by instruction tuning, and restored by reasoning distillation at the same scale.","strongest_claim":"Circuit-level analysis reveals a U-curve of bias: SLMs exhibit strong criminal bias; scaling to instruction-tuned models eliminates it; reasoning distillation reintroduces bias to SLM-like levels despite identical parameter counts, suggesting distillation compresses reasoning traces in ways that reactivate shallow statistical associations.","weakest_assumption":"That the chosen criminal-bias scenarios and interpretability probes (logit lens, attention analysis, activation patching, semantic probing) isolate bias circuits without confounding from prompt wording, model scale, or other unmeasured factors."}},"verdict_id":"35e2322a-9d1b-4823-804d-218253b66d43"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:95b7c4225fa5b40c254a78050f041afddf775a1f6acb74e1e196f26388da67c7","target":"record","created_at":"2026-06-05T01:14:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f551c6ae015815f3ec2a903e619a098b646a1c74c3f608a740f3bd91a991dc95","cross_cats_sorted":["cs.CY"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-04T23:12:32Z","title_canon_sha256":"3052527ba04e593aad9f68352d09a30bfa650a0a3c9eeac963323c24ad3cc7ca"},"schema_version":"1.0","source":{"id":"2605.03217","kind":"arxiv","version":2}},"canonical_sha256":"d53f1aca68bedc297cdbb8e23f79b6f83fe8f3ff51a5e6cfbeb572dcbe7c6078","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d53f1aca68bedc297cdbb8e23f79b6f83fe8f3ff51a5e6cfbeb572dcbe7c6078","first_computed_at":"2026-06-05T01:14:39.972307Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-05T01:14:39.972307Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ULw6ELs+PkJFNQjsfUpxY0bx63RKZr7Dwe8DHO20XyW+z99CsITpD/ckAEiBUY1Ukof9AUXhFuIs/j1iUm9vDg==","signature_status":"signed_v1","signed_at":"2026-06-05T01:14:39.973087Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.03217","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:95b7c4225fa5b40c254a78050f041afddf775a1f6acb74e1e196f26388da67c7","sha256:74aecf456330e63c1f23e9512c63cd7383b26e9dce9e925a11db7d54cc3c9bd9"],"state_sha256":"36cdc7a457726d1e9e1bd4bc00c55bf0038e98e1de474d1e7652aee17ee6c8f9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Uo7YaCCiHcnFmuHRAi6/Otb5+BSK6oHX0bI9E1H7ZFFPBrSds8+KWzV4uOQjHJ6c7va3qZffMAOom2J2vliADw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T22:43:00.564985Z","bundle_sha256":"608540f11eb11b4abbc0a92df8f41c7df7e784fc5a725225bec57c0142ca8e30"}}