{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:W4JUOTEBQR4LHRQVVCL4KPFK5J","short_pith_number":"pith:W4JUOTEB","canonical_record":{"source":{"id":"2605.21404","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T17:02:36Z","cross_cats_sorted":[],"title_canon_sha256":"247924792d244039e5951ebaa57ea56a577a5df388d369fcc675e6c108fb90de","abstract_canon_sha256":"4eda7461ed9be647286bc3c5ef901f599da4bc63a4e5a927499f4ffbafea56ab"},"schema_version":"1.0"},"canonical_sha256":"b713474c818478b3c615a897c53caaea55ef3606cb3d7efc95fa6a0ab9e59144","source":{"kind":"arxiv","id":"2605.21404","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21404","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21404v1","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21404","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"pith_short_12","alias_value":"W4JUOTEBQR4L","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"pith_short_16","alias_value":"W4JUOTEBQR4LHRQV","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"pith_short_8","alias_value":"W4JUOTEB","created_at":"2026-05-21T02:05:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:W4JUOTEBQR4LHRQVVCL4KPFK5J","target":"record","payload":{"canonical_record":{"source":{"id":"2605.21404","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T17:02:36Z","cross_cats_sorted":[],"title_canon_sha256":"247924792d244039e5951ebaa57ea56a577a5df388d369fcc675e6c108fb90de","abstract_canon_sha256":"4eda7461ed9be647286bc3c5ef901f599da4bc63a4e5a927499f4ffbafea56ab"},"schema_version":"1.0"},"canonical_sha256":"b713474c818478b3c615a897c53caaea55ef3606cb3d7efc95fa6a0ab9e59144","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T02:05:33.090105Z","signature_b64":"X+wQLjHfHpT5NxpwNAw3Ig9pK+1cy2SiOnNbiAv4MRg4n++nCjcudcqzh7H8yMZDjAfkgX4ZsKAvHIweHINtAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b713474c818478b3c615a897c53caaea55ef3606cb3d7efc95fa6a0ab9e59144","last_reissued_at":"2026-05-21T02:05:33.089608Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T02:05:33.089608Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.21404","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T02:05:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hIzISa8G3ZhlmHewjLcXB215qAxbIG5jk7Wih4BBwKEv3DQalGb6/a+un4txKO9MDcP8GY3XXerNbNjZJrKHCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T03:07:24.012213Z"},"content_sha256":"bee8b6ca083ade36406f7326244404c0f06282dbd041975f9b9c8899212149ed","schema_version":"1.0","event_id":"sha256:bee8b6ca083ade36406f7326244404c0f06282dbd041975f9b9c8899212149ed"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:W4JUOTEBQR4LHRQVVCL4KPFK5J","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"What Twelve LLM Agent Benchmark Papers Disclose About Themselves: A Pilot Audit and an Open Scoring Schema","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Faezeh Ghaderi (University of Texas at Arlington), Mahdi Naser Moghadasi (BrightMind AI, Texas Tech University)","submitted_at":"2026-05-20T17:02:36Z","abstract_excerpt":"We read twelve well-known LLM agent benchmark papers and recorded, dimension by dimension, what each paper actually says about how its evaluation was run. The motivation came from a familiar frustration: two papers will report results on the same benchmark with the same model name and disagree, and you cannot tell why -- the scaffold, the sampling settings, the subset, or the evaluator version. In many cases the published artifact does not let you answer. This paper is an implementation report on the attempt. We designed a small audit schema (five fields: benchmark identity, harness specificat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21404","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.21404/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T02:05:33Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iZNivR1Qlasmqw4gHv3ORsM3pqfI6XgMqt//LcbImFFAsBc8SDX/P7NRyfl5gfwYJVQtSDjp7gcjPmbtMgNDBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T03:07:24.012921Z"},"content_sha256":"7d84e602dac60da2dcdc676b9c74b961302c1984a5efa105fac15525075f9530","schema_version":"1.0","event_id":"sha256:7d84e602dac60da2dcdc676b9c74b961302c1984a5efa105fac15525075f9530"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J/bundle.json","state_url":"https://pith.science/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T03:07:24Z","links":{"resolver":"https://pith.science/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J","bundle":"https://pith.science/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J/bundle.json","state":"https://pith.science/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J/state.json","well_known_bundle":"https://pith.science/.well-known/pith/W4JUOTEBQR4LHRQVVCL4KPFK5J/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:W4JUOTEBQR4LHRQVVCL4KPFK5J","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4eda7461ed9be647286bc3c5ef901f599da4bc63a4e5a927499f4ffbafea56ab","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T17:02:36Z","title_canon_sha256":"247924792d244039e5951ebaa57ea56a577a5df388d369fcc675e6c108fb90de"},"schema_version":"1.0","source":{"id":"2605.21404","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21404","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21404v1","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21404","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"pith_short_12","alias_value":"W4JUOTEBQR4L","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"pith_short_16","alias_value":"W4JUOTEBQR4LHRQV","created_at":"2026-05-21T02:05:33Z"},{"alias_kind":"pith_short_8","alias_value":"W4JUOTEB","created_at":"2026-05-21T02:05:33Z"}],"graph_snapshots":[{"event_id":"sha256:7d84e602dac60da2dcdc676b9c74b961302c1984a5efa105fac15525075f9530","target":"graph","created_at":"2026-05-21T02:05:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.21404/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We read twelve well-known LLM agent benchmark papers and recorded, dimension by dimension, what each paper actually says about how its evaluation was run. The motivation came from a familiar frustration: two papers will report results on the same benchmark with the same model name and disagree, and you cannot tell why -- the scaffold, the sampling settings, the subset, or the evaluator version. In many cases the published artifact does not let you answer. This paper is an implementation report on the attempt. We designed a small audit schema (five fields: benchmark identity, harness specificat","authors_text":"Faezeh Ghaderi (University of Texas at Arlington), Mahdi Naser Moghadasi (BrightMind AI, Texas Tech University)","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T17:02:36Z","title":"What Twelve LLM Agent Benchmark Papers Disclose About Themselves: A Pilot Audit and an Open Scoring Schema"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21404","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:bee8b6ca083ade36406f7326244404c0f06282dbd041975f9b9c8899212149ed","target":"record","created_at":"2026-05-21T02:05:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4eda7461ed9be647286bc3c5ef901f599da4bc63a4e5a927499f4ffbafea56ab","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T17:02:36Z","title_canon_sha256":"247924792d244039e5951ebaa57ea56a577a5df388d369fcc675e6c108fb90de"},"schema_version":"1.0","source":{"id":"2605.21404","kind":"arxiv","version":1}},"canonical_sha256":"b713474c818478b3c615a897c53caaea55ef3606cb3d7efc95fa6a0ab9e59144","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b713474c818478b3c615a897c53caaea55ef3606cb3d7efc95fa6a0ab9e59144","first_computed_at":"2026-05-21T02:05:33.089608Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T02:05:33.089608Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"X+wQLjHfHpT5NxpwNAw3Ig9pK+1cy2SiOnNbiAv4MRg4n++nCjcudcqzh7H8yMZDjAfkgX4ZsKAvHIweHINtAA==","signature_status":"signed_v1","signed_at":"2026-05-21T02:05:33.090105Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.21404","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:bee8b6ca083ade36406f7326244404c0f06282dbd041975f9b9c8899212149ed","sha256:7d84e602dac60da2dcdc676b9c74b961302c1984a5efa105fac15525075f9530"],"state_sha256":"425ffaf6af985f697c8856230c2ddbc6a95972e5fab5238d34bf23f7d4ea8f27"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wuJPt9RfkSJXPNpcefGtCfsYIFw57tEQyeQ1KGNjuq37QgxIK6xM5qQCBqvNmxICxFe8tF6nM1aWohIoQHcuBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T03:07:24.016467Z","bundle_sha256":"c6f900f392d707adf5fcf0627784dd14e451f7876482f03b7407539ddf5b3674"}}