{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4C5WDQICWM6QPCNENRTJX5RBY3","short_pith_number":"pith:4C5WDQIC","canonical_record":{"source":{"id":"2606.23403","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T14:26:48Z","cross_cats_sorted":[],"title_canon_sha256":"b4ba5a2201843d274c80e716b6007a6cda15cfcfcad2d86006b5dabd90feaa2a","abstract_canon_sha256":"93691792e876ef0df1b74e3937b153b231b04b8062cd204733722d9e6c84bda4"},"schema_version":"1.0"},"canonical_sha256":"e0bb61c102b33d0789a46c669bf621c6dd3460192fbe9ce914d3b4bb1e36dbdc","source":{"kind":"arxiv","id":"2606.23403","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.23403","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"arxiv_version","alias_value":"2606.23403v1","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23403","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"pith_short_12","alias_value":"4C5WDQICWM6Q","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"pith_short_16","alias_value":"4C5WDQICWM6QPCNE","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"pith_short_8","alias_value":"4C5WDQIC","created_at":"2026-06-23T03:14:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4C5WDQICWM6QPCNENRTJX5RBY3","target":"record","payload":{"canonical_record":{"source":{"id":"2606.23403","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T14:26:48Z","cross_cats_sorted":[],"title_canon_sha256":"b4ba5a2201843d274c80e716b6007a6cda15cfcfcad2d86006b5dabd90feaa2a","abstract_canon_sha256":"93691792e876ef0df1b74e3937b153b231b04b8062cd204733722d9e6c84bda4"},"schema_version":"1.0"},"canonical_sha256":"e0bb61c102b33d0789a46c669bf621c6dd3460192fbe9ce914d3b4bb1e36dbdc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:14:19.145629Z","signature_b64":"rTHEGG7TOEBKvM7BaCQKY0XOUTFpMC1mAZTkjFUtZEneZi7lCjCPBedsTvZ7GEkfFTqznuacXR6OsadIsouiAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e0bb61c102b33d0789a46c669bf621c6dd3460192fbe9ce914d3b4bb1e36dbdc","last_reissued_at":"2026-06-23T03:14:19.145221Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:14:19.145221Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.23403","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T03:14:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0HIYvPTL7EWhBh4KO8Q/8KoTi2YR9lRs3RPZfv+jofH6yOTLHZWz1iV9Y2vHXn6GUwWZAi2ydPI/GotSmhtHBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T20:58:13.159226Z"},"content_sha256":"3a851afff737aba4cde391253be6c12b344df2855927d8c4c5b9f9e2d0dbe92a","schema_version":"1.0","event_id":"sha256:3a851afff737aba4cde391253be6c12b344df2855927d8c4c5b9f9e2d0dbe92a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4C5WDQICWM6QPCNENRTJX5RBY3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Litmus: Zero-Label, Code-Driven Metric Specification for Evaluating AI Systems","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Apoorva Sharma, Kevin Paul, Prajjwal Gupta, Prasang Gupta, Sumanth Chundru, Vishal Bhutani, Waqar Sarguroh","submitted_at":"2026-06-22T14:26:48Z","abstract_excerpt":"As agentic LLM systems move from prototypes to deployment across increasingly diverse domains, evaluating them has become both more important and more difficult. The challenge is not only that individual metrics may be unreliable, but that evaluation goals are often left implicit. Without a clear account of what a system is expected to do, how it can fail, and which failures matter, metric choices become difficult to justify, interpret, or validate. We present Litmus, a zero-label system that designs evaluation and monitoring metrics for AI pipelines by eliciting evaluation intent from source "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23403","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.23403/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T03:14:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xcnRXpgqMJZAV6VDzRDlqwbo6AzE0BO5a99tw2TnEmkUPiw1n/Cd+qi96YY7q+3F3Wt+xUDQWEbhxONJgJGZDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T20:58:13.159633Z"},"content_sha256":"96f16053fca094d5f8059402de235a85b0fe4bd0a63f0a7315ca39ad399ea17e","schema_version":"1.0","event_id":"sha256:96f16053fca094d5f8059402de235a85b0fe4bd0a63f0a7315ca39ad399ea17e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4C5WDQICWM6QPCNENRTJX5RBY3/bundle.json","state_url":"https://pith.science/pith/4C5WDQICWM6QPCNENRTJX5RBY3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4C5WDQICWM6QPCNENRTJX5RBY3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T20:58:13Z","links":{"resolver":"https://pith.science/pith/4C5WDQICWM6QPCNENRTJX5RBY3","bundle":"https://pith.science/pith/4C5WDQICWM6QPCNENRTJX5RBY3/bundle.json","state":"https://pith.science/pith/4C5WDQICWM6QPCNENRTJX5RBY3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4C5WDQICWM6QPCNENRTJX5RBY3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4C5WDQICWM6QPCNENRTJX5RBY3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"93691792e876ef0df1b74e3937b153b231b04b8062cd204733722d9e6c84bda4","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T14:26:48Z","title_canon_sha256":"b4ba5a2201843d274c80e716b6007a6cda15cfcfcad2d86006b5dabd90feaa2a"},"schema_version":"1.0","source":{"id":"2606.23403","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.23403","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"arxiv_version","alias_value":"2606.23403v1","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23403","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"pith_short_12","alias_value":"4C5WDQICWM6Q","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"pith_short_16","alias_value":"4C5WDQICWM6QPCNE","created_at":"2026-06-23T03:14:19Z"},{"alias_kind":"pith_short_8","alias_value":"4C5WDQIC","created_at":"2026-06-23T03:14:19Z"}],"graph_snapshots":[{"event_id":"sha256:96f16053fca094d5f8059402de235a85b0fe4bd0a63f0a7315ca39ad399ea17e","target":"graph","created_at":"2026-06-23T03:14:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.23403/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As agentic LLM systems move from prototypes to deployment across increasingly diverse domains, evaluating them has become both more important and more difficult. The challenge is not only that individual metrics may be unreliable, but that evaluation goals are often left implicit. Without a clear account of what a system is expected to do, how it can fail, and which failures matter, metric choices become difficult to justify, interpret, or validate. We present Litmus, a zero-label system that designs evaluation and monitoring metrics for AI pipelines by eliciting evaluation intent from source ","authors_text":"Apoorva Sharma, Kevin Paul, Prajjwal Gupta, Prasang Gupta, Sumanth Chundru, Vishal Bhutani, Waqar Sarguroh","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T14:26:48Z","title":"Litmus: Zero-Label, Code-Driven Metric Specification for Evaluating AI Systems"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23403","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3a851afff737aba4cde391253be6c12b344df2855927d8c4c5b9f9e2d0dbe92a","target":"record","created_at":"2026-06-23T03:14:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"93691792e876ef0df1b74e3937b153b231b04b8062cd204733722d9e6c84bda4","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T14:26:48Z","title_canon_sha256":"b4ba5a2201843d274c80e716b6007a6cda15cfcfcad2d86006b5dabd90feaa2a"},"schema_version":"1.0","source":{"id":"2606.23403","kind":"arxiv","version":1}},"canonical_sha256":"e0bb61c102b33d0789a46c669bf621c6dd3460192fbe9ce914d3b4bb1e36dbdc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e0bb61c102b33d0789a46c669bf621c6dd3460192fbe9ce914d3b4bb1e36dbdc","first_computed_at":"2026-06-23T03:14:19.145221Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T03:14:19.145221Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"rTHEGG7TOEBKvM7BaCQKY0XOUTFpMC1mAZTkjFUtZEneZi7lCjCPBedsTvZ7GEkfFTqznuacXR6OsadIsouiAg==","signature_status":"signed_v1","signed_at":"2026-06-23T03:14:19.145629Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.23403","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3a851afff737aba4cde391253be6c12b344df2855927d8c4c5b9f9e2d0dbe92a","sha256:96f16053fca094d5f8059402de235a85b0fe4bd0a63f0a7315ca39ad399ea17e"],"state_sha256":"b5f25ff658ef9b7453d251a24d35ceb93191f5a94d095091301b94cf1b8b8b2b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nDkvkW79bJhLjbROVbTJI6iFH9OvE6WljadOqVvt/ycAEenSPICgJ76pzMTyDkycX5OVncsM7eriG3nUJgWvAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T20:58:13.161513Z","bundle_sha256":"a66ab44d5a6ce28cd4eedaed1a397a1f09617c4c6ef89cd5c9d525e00e293b6e"}}