{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:KP27Q3ABXMLX3LNJ3TWELYJXRX","short_pith_number":"pith:KP27Q3AB","canonical_record":{"source":{"id":"2510.18821","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-21T17:19:35Z","cross_cats_sorted":[],"title_canon_sha256":"9282ea2f430c6b96a214184d2ef18fbc26763433544fed61bba5c3bbb00256f7","abstract_canon_sha256":"552c7109c6c1f1126f04898b536d0ec1de1b2a27a2a880ae7755fd536004197d"},"schema_version":"1.0"},"canonical_sha256":"53f5f86c01bb177dada9dcec45e1378de362b405ad91ff72dcacc95fad9bf5b5","source":{"kind":"arxiv","id":"2510.18821","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.18821","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"arxiv_version","alias_value":"2510.18821v3","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.18821","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"pith_short_12","alias_value":"KP27Q3ABXMLX","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"pith_short_16","alias_value":"KP27Q3ABXMLX3LNJ","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"pith_short_8","alias_value":"KP27Q3AB","created_at":"2026-05-20T01:05:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:KP27Q3ABXMLX3LNJ3TWELYJXRX","target":"record","payload":{"canonical_record":{"source":{"id":"2510.18821","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-21T17:19:35Z","cross_cats_sorted":[],"title_canon_sha256":"9282ea2f430c6b96a214184d2ef18fbc26763433544fed61bba5c3bbb00256f7","abstract_canon_sha256":"552c7109c6c1f1126f04898b536d0ec1de1b2a27a2a880ae7755fd536004197d"},"schema_version":"1.0"},"canonical_sha256":"53f5f86c01bb177dada9dcec45e1378de362b405ad91ff72dcacc95fad9bf5b5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:00.498346Z","signature_b64":"bobeYIHybjnDFuCbWMIJOSLmmwtVHPDxZJ1/EOrLscyD1I8LYlHYKhUg28LArnk8WAbJyq0HKTeTZyShkGnCAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"53f5f86c01bb177dada9dcec45e1378de362b405ad91ff72dcacc95fad9bf5b5","last_reissued_at":"2026-05-20T01:05:00.497560Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:00.497560Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.18821","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1ck12kDrI+pBNnahSZGoCs3bs98nO7KnfVPOdH3AWtdOvvcB+G1YrtimAPnH1qJ+17OnzILelQ0XLrA129B6CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T12:51:11.271657Z"},"content_sha256":"36db5f651f255f0375abf0e36df844e72191728fb2db13f81b5fdc1f2d9c76b0","schema_version":"1.0","event_id":"sha256:36db5f651f255f0375abf0e36df844e72191728fb2db13f81b5fdc1f2d9c76b0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:KP27Q3ABXMLX3LNJ3TWELYJXRX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Search Self-play: Pushing the Frontier of Agent Capability without Supervision","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chutian Wang, Guanjun Jiang, Haonan Chen, Haotian Xu, Hongliang Lu, Jiaqi Guo, Pengyu Cheng, Ruijin Ding, Xiaoxi Jiang, Yuhang Wen","submitted_at":"2025-10-21T17:19:35Z","abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has become the mainstream technique for training LLM agents. However, RLVR highly depends on well-crafted task queries and corresponding ground-truth answers to provide accurate rewards, which requires significant human effort and hinders the scaling of RL processes, especially in agentic scenarios. Although a few recent works explore task synthesis methods, the difficulty of generated agentic tasks can hardly be controlled to provide effective RL training advantages. To achieve agentic RLVR with higher scalability, we explore self-play tra"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.18821","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.18821/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IWh/asqWasxxPrpYRpCLRM5BhdW+ZCvwmg/STq5AqhuxVYq1k/zBtPKaNVJbbp2FfzPRQB85nZfhpAICiRpfCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-21T12:51:11.272299Z"},"content_sha256":"bf7d7cbf92de9a1609777c848954ab22a0f6470c73440f7a47fb60c1cf6e3398","schema_version":"1.0","event_id":"sha256:bf7d7cbf92de9a1609777c848954ab22a0f6470c73440f7a47fb60c1cf6e3398"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX/bundle.json","state_url":"https://pith.science/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-21T12:51:11Z","links":{"resolver":"https://pith.science/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX","bundle":"https://pith.science/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX/bundle.json","state":"https://pith.science/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KP27Q3ABXMLX3LNJ3TWELYJXRX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:KP27Q3ABXMLX3LNJ3TWELYJXRX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"552c7109c6c1f1126f04898b536d0ec1de1b2a27a2a880ae7755fd536004197d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-21T17:19:35Z","title_canon_sha256":"9282ea2f430c6b96a214184d2ef18fbc26763433544fed61bba5c3bbb00256f7"},"schema_version":"1.0","source":{"id":"2510.18821","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.18821","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"arxiv_version","alias_value":"2510.18821v3","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.18821","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"pith_short_12","alias_value":"KP27Q3ABXMLX","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"pith_short_16","alias_value":"KP27Q3ABXMLX3LNJ","created_at":"2026-05-20T01:05:00Z"},{"alias_kind":"pith_short_8","alias_value":"KP27Q3AB","created_at":"2026-05-20T01:05:00Z"}],"graph_snapshots":[{"event_id":"sha256:bf7d7cbf92de9a1609777c848954ab22a0f6470c73440f7a47fb60c1cf6e3398","target":"graph","created_at":"2026-05-20T01:05:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.18821/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning with verifiable rewards (RLVR) has become the mainstream technique for training LLM agents. However, RLVR highly depends on well-crafted task queries and corresponding ground-truth answers to provide accurate rewards, which requires significant human effort and hinders the scaling of RL processes, especially in agentic scenarios. Although a few recent works explore task synthesis methods, the difficulty of generated agentic tasks can hardly be controlled to provide effective RL training advantages. To achieve agentic RLVR with higher scalability, we explore self-play tra","authors_text":"Chutian Wang, Guanjun Jiang, Haonan Chen, Haotian Xu, Hongliang Lu, Jiaqi Guo, Pengyu Cheng, Ruijin Ding, Xiaoxi Jiang, Yuhang Wen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-21T17:19:35Z","title":"Search Self-play: Pushing the Frontier of Agent Capability without Supervision"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.18821","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:36db5f651f255f0375abf0e36df844e72191728fb2db13f81b5fdc1f2d9c76b0","target":"record","created_at":"2026-05-20T01:05:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"552c7109c6c1f1126f04898b536d0ec1de1b2a27a2a880ae7755fd536004197d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-21T17:19:35Z","title_canon_sha256":"9282ea2f430c6b96a214184d2ef18fbc26763433544fed61bba5c3bbb00256f7"},"schema_version":"1.0","source":{"id":"2510.18821","kind":"arxiv","version":3}},"canonical_sha256":"53f5f86c01bb177dada9dcec45e1378de362b405ad91ff72dcacc95fad9bf5b5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"53f5f86c01bb177dada9dcec45e1378de362b405ad91ff72dcacc95fad9bf5b5","first_computed_at":"2026-05-20T01:05:00.497560Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:00.497560Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bobeYIHybjnDFuCbWMIJOSLmmwtVHPDxZJ1/EOrLscyD1I8LYlHYKhUg28LArnk8WAbJyq0HKTeTZyShkGnCAg==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:00.498346Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.18821","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:36db5f651f255f0375abf0e36df844e72191728fb2db13f81b5fdc1f2d9c76b0","sha256:bf7d7cbf92de9a1609777c848954ab22a0f6470c73440f7a47fb60c1cf6e3398"],"state_sha256":"c2a81c16b78fd9e11573dcdba6bb39d81488c669880df978a6c07923aaec2601"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WFQEgSuIkN1bA8o6Qet/YeHrJSDsyVTpL92E7+Gsng0M2n7AqJwrDhax4ytahL5bgCMtQvw/6SuM+8vMo3r5Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-21T12:51:11.275444Z","bundle_sha256":"355ca3dd8a2642a0d027a9506ec0a47f64bc388050d1fd4838ba33930de45f7f"}}