{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:POYGVIC7VVS4UPUEY5625GOWFH","short_pith_number":"pith:POYGVIC7","schema_version":"1.0","canonical_sha256":"7bb06aa05fad65ca3e84c77dae99d629db6eebbb41a60d4a791583c23acb60c0","source":{"kind":"arxiv","id":"2605.20241","version":1},"attestation_state":"computed","paper":{"title":"Geometry-Lite: Interpretable Safety Probing via Layer-Wise Margin Geometry","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Woo Seob Sim, Yu Rang Park","submitted_at":"2026-05-18T00:12:24Z","abstract_excerpt":"Prompt-level safety probes for large language models use hidden-state representations to separate safe from unsafe prompts, but strong average detection performance does not explain the geometry of this separation. In particular, it remains unclear how safety evidence is formed across layers, which aspects of that layer-wise geometry support low-false-positive decisions, and which geometric biases remain stable under benchmark shift. We study this as an empirical decomposition problem and introduce Geometry-Lite, a compact prompt-level probe that maps each layer's final prompt-token representa"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.20241","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T00:12:24Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"6b0f64d410072277f944562e55670b89fec434343a8ca93ad4df925f4c63e459","abstract_canon_sha256":"2f31cde3777c28e2cfc2adb67c251bc3477af14188507fc30f0136ab54a9a9db"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T00:04:22.319002Z","signature_b64":"kdw+qPMq1L6jdaSKL0c7yMizi+tdHu+bAP1z0IJexWz5fSKII6gpQkK1i9wv2NF70q52I23KN/AF17hHF7YJCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7bb06aa05fad65ca3e84c77dae99d629db6eebbb41a60d4a791583c23acb60c0","last_reissued_at":"2026-05-21T00:04:22.318173Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T00:04:22.318173Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Geometry-Lite: Interpretable Safety Probing via Layer-Wise Margin Geometry","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Woo Seob Sim, Yu Rang Park","submitted_at":"2026-05-18T00:12:24Z","abstract_excerpt":"Prompt-level safety probes for large language models use hidden-state representations to separate safe from unsafe prompts, but strong average detection performance does not explain the geometry of this separation. In particular, it remains unclear how safety evidence is formed across layers, which aspects of that layer-wise geometry support low-false-positive decisions, and which geometric biases remain stable under benchmark shift. We study this as an empirical decomposition problem and introduce Geometry-Lite, a compact prompt-level probe that maps each layer's final prompt-token representa"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20241","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20241/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.20241","created_at":"2026-05-21T00:04:22.318301+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.20241v1","created_at":"2026-05-21T00:04:22.318301+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20241","created_at":"2026-05-21T00:04:22.318301+00:00"},{"alias_kind":"pith_short_12","alias_value":"POYGVIC7VVS4","created_at":"2026-05-21T00:04:22.318301+00:00"},{"alias_kind":"pith_short_16","alias_value":"POYGVIC7VVS4UPUE","created_at":"2026-05-21T00:04:22.318301+00:00"},{"alias_kind":"pith_short_8","alias_value":"POYGVIC7","created_at":"2026-05-21T00:04:22.318301+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH","json":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH.json","graph_json":"https://pith.science/api/pith-number/POYGVIC7VVS4UPUEY5625GOWFH/graph.json","events_json":"https://pith.science/api/pith-number/POYGVIC7VVS4UPUEY5625GOWFH/events.json","paper":"https://pith.science/paper/POYGVIC7"},"agent_actions":{"view_html":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH","download_json":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH.json","view_paper":"https://pith.science/paper/POYGVIC7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.20241&json=true","fetch_graph":"https://pith.science/api/pith-number/POYGVIC7VVS4UPUEY5625GOWFH/graph.json","fetch_events":"https://pith.science/api/pith-number/POYGVIC7VVS4UPUEY5625GOWFH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH/action/storage_attestation","attest_author":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH/action/author_attestation","sign_citation":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH/action/citation_signature","submit_replication":"https://pith.science/pith/POYGVIC7VVS4UPUEY5625GOWFH/action/replication_record"}},"created_at":"2026-05-21T00:04:22.318301+00:00","updated_at":"2026-05-21T00:04:22.318301+00:00"}