{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:XFQWT2RUKVYSPY7QE7FGBMC6GX","short_pith_number":"pith:XFQWT2RU","schema_version":"1.0","canonical_sha256":"b96169ea34557127e3f027ca60b05e35dc6c6137f652de7e47e029323207d34d","source":{"kind":"arxiv","id":"2606.06746","version":1},"attestation_state":"computed","paper":{"title":"Performance Variation in Deep Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"A. Rupam Mahmood, Haruto Tanaka","submitted_at":"2026-06-04T21:59:47Z","abstract_excerpt":"Deep reinforcement learning (RL) algorithms often suffer from low run-to-run robustness, manifesting as significant performance variation across independent runs of identically configured agents. Although this issue poses a spectrum of challenges across research and practice, relatively few studies develop methods to evaluate it; RL research instead often reports uncertainty in the estimated mean performance. In this paper, we outline the limitations of conventional uncertainty and variation estimates, particularly their misalignment with purpose and the risk of underreporting. We then propose"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.06746","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-04T21:59:47Z","cross_cats_sorted":[],"title_canon_sha256":"eaf52ffc352ebbcffb97668a369d7c1d6f46fc10fd7a983c6b45e2444481caca","abstract_canon_sha256":"0e70e3f2d6a2a27e5e2398f3d3789133f8ab34058d3744ec57322e02c903c4f8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:04:25.830760Z","signature_b64":"7GyPO+XiVQatTbCm8rtutYTiXpqgFhP0xt0ZBQ0uBcy7RoOjeDEw6vbyCGhJLPEkCwWcZRAZY+rQoClNGmRCCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b96169ea34557127e3f027ca60b05e35dc6c6137f652de7e47e029323207d34d","last_reissued_at":"2026-06-08T01:04:25.829947Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:04:25.829947Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Performance Variation in Deep Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"A. Rupam Mahmood, Haruto Tanaka","submitted_at":"2026-06-04T21:59:47Z","abstract_excerpt":"Deep reinforcement learning (RL) algorithms often suffer from low run-to-run robustness, manifesting as significant performance variation across independent runs of identically configured agents. Although this issue poses a spectrum of challenges across research and practice, relatively few studies develop methods to evaluate it; RL research instead often reports uncertainty in the estimated mean performance. In this paper, we outline the limitations of conventional uncertainty and variation estimates, particularly their misalignment with purpose and the risk of underreporting. We then propose"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06746","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06746/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.06746","created_at":"2026-06-08T01:04:25.830070+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.06746v1","created_at":"2026-06-08T01:04:25.830070+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06746","created_at":"2026-06-08T01:04:25.830070+00:00"},{"alias_kind":"pith_short_12","alias_value":"XFQWT2RUKVYS","created_at":"2026-06-08T01:04:25.830070+00:00"},{"alias_kind":"pith_short_16","alias_value":"XFQWT2RUKVYSPY7Q","created_at":"2026-06-08T01:04:25.830070+00:00"},{"alias_kind":"pith_short_8","alias_value":"XFQWT2RU","created_at":"2026-06-08T01:04:25.830070+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.18594","citing_title":"Benchmarking Action Spaces in Reinforcement Learning for Vision-based Robotic Manipulation","ref_index":27,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX","json":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX.json","graph_json":"https://pith.science/api/pith-number/XFQWT2RUKVYSPY7QE7FGBMC6GX/graph.json","events_json":"https://pith.science/api/pith-number/XFQWT2RUKVYSPY7QE7FGBMC6GX/events.json","paper":"https://pith.science/paper/XFQWT2RU"},"agent_actions":{"view_html":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX","download_json":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX.json","view_paper":"https://pith.science/paper/XFQWT2RU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.06746&json=true","fetch_graph":"https://pith.science/api/pith-number/XFQWT2RUKVYSPY7QE7FGBMC6GX/graph.json","fetch_events":"https://pith.science/api/pith-number/XFQWT2RUKVYSPY7QE7FGBMC6GX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX/action/storage_attestation","attest_author":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX/action/author_attestation","sign_citation":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX/action/citation_signature","submit_replication":"https://pith.science/pith/XFQWT2RUKVYSPY7QE7FGBMC6GX/action/replication_record"}},"created_at":"2026-06-08T01:04:25.830070+00:00","updated_at":"2026-06-08T01:04:25.830070+00:00"}