{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:6RVE6AICSZZC4M6VRZ2TSA4RIL","short_pith_number":"pith:6RVE6AIC","canonical_record":{"source":{"id":"2602.01357","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T17:50:59Z","cross_cats_sorted":[],"title_canon_sha256":"db5a3d232a556923e5bc40826c1f02b84cc78b9d3bf0b19239b0ab4113c36b66","abstract_canon_sha256":"c8870fa0037465ce8d4aaee4825b5354e61a6a03ed08702dbf55fba4a0dc13ca"},"schema_version":"1.0"},"canonical_sha256":"f46a4f010296722e33d58e7539039142f2cb26613b55829a04c208548c493d35","source":{"kind":"arxiv","id":"2602.01357","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.01357","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"arxiv_version","alias_value":"2602.01357v2","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.01357","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_12","alias_value":"6RVE6AICSZZC","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_16","alias_value":"6RVE6AICSZZC4M6V","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_8","alias_value":"6RVE6AIC","created_at":"2026-06-09T02:07:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:6RVE6AICSZZC4M6VRZ2TSA4RIL","target":"record","payload":{"canonical_record":{"source":{"id":"2602.01357","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T17:50:59Z","cross_cats_sorted":[],"title_canon_sha256":"db5a3d232a556923e5bc40826c1f02b84cc78b9d3bf0b19239b0ab4113c36b66","abstract_canon_sha256":"c8870fa0037465ce8d4aaee4825b5354e61a6a03ed08702dbf55fba4a0dc13ca"},"schema_version":"1.0"},"canonical_sha256":"f46a4f010296722e33d58e7539039142f2cb26613b55829a04c208548c493d35","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:18.919277Z","signature_b64":"KeBrSlrDQt1hWsEcLoiIgRwQ4+jcgmf8O+pviiH506Qpif9EVwJRVIoubxOsRVvgnrAQqS4K2fTI93QOq0YsCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f46a4f010296722e33d58e7539039142f2cb26613b55829a04c208548c493d35","last_reissued_at":"2026-06-09T02:07:18.918381Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:18.918381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.01357","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Iu7IQsrc4BNPV+tTPe8NTlDh35U7qEII436/aFz6A0mGdmMlobu3mS0t0PGp0AtKOpF5Eg9GL33ZlvrwzUT4Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T00:46:17.286134Z"},"content_sha256":"6e8a72315d77ce54cbaccc00de774cbae5de86b8bf34dc47ad4a51cf2a6b84a6","schema_version":"1.0","event_id":"sha256:6e8a72315d77ce54cbaccc00de774cbae5de86b8bf34dc47ad4a51cf2a6b84a6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:6RVE6AICSZZC4M6VRZ2TSA4RIL","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Your Self-Play Algorithm is Secretly an Adversarial Imitator: Understanding LLM Self-Play through the Lens of Imitation Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Chetan Bansal, Shangzhe Li, Weitong Zhang, Xuchao Zhang","submitted_at":"2026-02-01T17:50:59Z","abstract_excerpt":"Self-play post-training methods has emerged as an effective approach for finetuning large language models and turn the weak language model into strong language model without preference data. However, the theoretical foundations for self-play finetuning remain underexplored. In this work, we tackle this by connecting self-play finetuning with adversarial imitation learning by formulating finetuning procedure as a min-max game between the model and a regularized implicit reward player parameterized by the model itself. This perspective unifies self-play imitation and general preference alignment"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.01357","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.01357/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Xya9k8FUvD+dbl5FyYW0yQN1Mgmaoue+7xj8JF+DVcSwK8KmElCS4KtEG/cndKi79cO4PVKXi6r/bE0MtcMMAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T00:46:17.286924Z"},"content_sha256":"2ce743ca0aa41e6bb63d54d10b38ebf2d598e51f1af6c369e0ee6db8c952784e","schema_version":"1.0","event_id":"sha256:2ce743ca0aa41e6bb63d54d10b38ebf2d598e51f1af6c369e0ee6db8c952784e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL/bundle.json","state_url":"https://pith.science/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T00:46:17Z","links":{"resolver":"https://pith.science/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL","bundle":"https://pith.science/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL/bundle.json","state":"https://pith.science/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6RVE6AICSZZC4M6VRZ2TSA4RIL/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:6RVE6AICSZZC4M6VRZ2TSA4RIL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c8870fa0037465ce8d4aaee4825b5354e61a6a03ed08702dbf55fba4a0dc13ca","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T17:50:59Z","title_canon_sha256":"db5a3d232a556923e5bc40826c1f02b84cc78b9d3bf0b19239b0ab4113c36b66"},"schema_version":"1.0","source":{"id":"2602.01357","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.01357","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"arxiv_version","alias_value":"2602.01357v2","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.01357","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_12","alias_value":"6RVE6AICSZZC","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_16","alias_value":"6RVE6AICSZZC4M6V","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_8","alias_value":"6RVE6AIC","created_at":"2026-06-09T02:07:18Z"}],"graph_snapshots":[{"event_id":"sha256:2ce743ca0aa41e6bb63d54d10b38ebf2d598e51f1af6c369e0ee6db8c952784e","target":"graph","created_at":"2026-06-09T02:07:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.01357/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Self-play post-training methods has emerged as an effective approach for finetuning large language models and turn the weak language model into strong language model without preference data. However, the theoretical foundations for self-play finetuning remain underexplored. In this work, we tackle this by connecting self-play finetuning with adversarial imitation learning by formulating finetuning procedure as a min-max game between the model and a regularized implicit reward player parameterized by the model itself. This perspective unifies self-play imitation and general preference alignment","authors_text":"Chetan Bansal, Shangzhe Li, Weitong Zhang, Xuchao Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T17:50:59Z","title":"Your Self-Play Algorithm is Secretly an Adversarial Imitator: Understanding LLM Self-Play through the Lens of Imitation Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.01357","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6e8a72315d77ce54cbaccc00de774cbae5de86b8bf34dc47ad4a51cf2a6b84a6","target":"record","created_at":"2026-06-09T02:07:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c8870fa0037465ce8d4aaee4825b5354e61a6a03ed08702dbf55fba4a0dc13ca","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-01T17:50:59Z","title_canon_sha256":"db5a3d232a556923e5bc40826c1f02b84cc78b9d3bf0b19239b0ab4113c36b66"},"schema_version":"1.0","source":{"id":"2602.01357","kind":"arxiv","version":2}},"canonical_sha256":"f46a4f010296722e33d58e7539039142f2cb26613b55829a04c208548c493d35","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f46a4f010296722e33d58e7539039142f2cb26613b55829a04c208548c493d35","first_computed_at":"2026-06-09T02:07:18.918381Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:18.918381Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KeBrSlrDQt1hWsEcLoiIgRwQ4+jcgmf8O+pviiH506Qpif9EVwJRVIoubxOsRVvgnrAQqS4K2fTI93QOq0YsCw==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:18.919277Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.01357","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6e8a72315d77ce54cbaccc00de774cbae5de86b8bf34dc47ad4a51cf2a6b84a6","sha256:2ce743ca0aa41e6bb63d54d10b38ebf2d598e51f1af6c369e0ee6db8c952784e"],"state_sha256":"58ea0ef2829efea050947a6e0b1ac8b8caaacef704f7f8c145ccd18c20e9b3cd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CzONBi0ModNCI2pbkv+7GMwU6pAqBR6VpSeciplKQrl8yP5PgxRtQyMxb7By3sI/CP64Dh2xnwIFbv/YCkgJBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T00:46:17.290704Z","bundle_sha256":"f5b40822564395a6c869941ec4c6b3bd4a5f3873de777c466981566cdab8d22d"}}