{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:DKEQ5E4J3VHEF66LOB6MBD2RDC","short_pith_number":"pith:DKEQ5E4J","canonical_record":{"source":{"id":"2509.22963","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-26T21:53:36Z","cross_cats_sorted":[],"title_canon_sha256":"182107543862f9fc3320a19b139b7abbb709b5070b30d3316ecac0d6d3b42f4a","abstract_canon_sha256":"923ac1b6715e18305aa5f922e97ddb47e6766b9b2d98f3fcfed5093665b71b4b"},"schema_version":"1.0"},"canonical_sha256":"1a890e9389dd4e42fbcb707cc08f51188f1ed96423a543412a02257024b68b2e","source":{"kind":"arxiv","id":"2509.22963","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.22963","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"arxiv_version","alias_value":"2509.22963v3","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.22963","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"pith_short_12","alias_value":"DKEQ5E4J3VHE","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"pith_short_16","alias_value":"DKEQ5E4J3VHEF66L","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"pith_short_8","alias_value":"DKEQ5E4J","created_at":"2026-05-21T01:04:17Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:DKEQ5E4J3VHEF66LOB6MBD2RDC","target":"record","payload":{"canonical_record":{"source":{"id":"2509.22963","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-26T21:53:36Z","cross_cats_sorted":[],"title_canon_sha256":"182107543862f9fc3320a19b139b7abbb709b5070b30d3316ecac0d6d3b42f4a","abstract_canon_sha256":"923ac1b6715e18305aa5f922e97ddb47e6766b9b2d98f3fcfed5093665b71b4b"},"schema_version":"1.0"},"canonical_sha256":"1a890e9389dd4e42fbcb707cc08f51188f1ed96423a543412a02257024b68b2e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:17.134523Z","signature_b64":"8nr+zqzaqyQKgrdWJVIw2TZZ2wRmFDHwrV2wXP8yqdXmjmBkf46bci6Recnf8vZu+te3QwqVb6M+/HRItvY5Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1a890e9389dd4e42fbcb707cc08f51188f1ed96423a543412a02257024b68b2e","last_reissued_at":"2026-05-21T01:04:17.133780Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:17.133780Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2509.22963","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MJiLd2FJdISaowb5MNJ7LRq0960UcDccHr9jQdsnxixdcALsfHipfu8Y6kn6DEXJ/o3WSiSbSB5lCIGquBlDAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T21:09:20.941803Z"},"content_sha256":"f3d885230d3608a082f1a6cdb11b8b4ad60a772ab2f380f98f9b3950ae996698","schema_version":"1.0","event_id":"sha256:f3d885230d3608a082f1a6cdb11b8b4ad60a772ab2f380f98f9b3950ae996698"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:DKEQ5E4J3VHEF66LOB6MBD2RDC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning with Discrete Diffusion Policies for Combinatorial Action Spaces","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aviv Rosenberg, Bo Dai, Craig Boutilier, Guy Tenneholtz, Haitong Ma, Lior Shani, Na Li, Ofir Nabati, Oran Lang, Shie Mannor","submitted_at":"2025-09-26T21:53:36Z","abstract_excerpt":"Reinforcement learning (RL) struggles to scale to large, combinatorial action spaces common in many real-world problems. This paper introduces a novel framework for training discrete diffusion models as highly effective policies in these complex settings. Our key innovation is an efficient online training process that ensures stable and effective policy improvement. By leveraging policy mirror descent (PMD) to define an ideal, regularized target policy distribution, we frame the policy update as a distributional matching problem, training the expressive diffusion model to replicate this stable"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.22963","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.22963/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7n2qRrlPWwMxjU+AD+4UJUMV0Vp/fHYhzhVG0kHPt1xwHAyi3OVSdyO4ugnOu83aY/TSVjjdnwaeK1HGreIoAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T21:09:20.942296Z"},"content_sha256":"7dde1ede57aa0d6f6a37cb69ddf8d8c31f99affdea9c05da80e1c3f5aed9fb92","schema_version":"1.0","event_id":"sha256:7dde1ede57aa0d6f6a37cb69ddf8d8c31f99affdea9c05da80e1c3f5aed9fb92"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC/bundle.json","state_url":"https://pith.science/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T21:09:20Z","links":{"resolver":"https://pith.science/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC","bundle":"https://pith.science/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC/bundle.json","state":"https://pith.science/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DKEQ5E4J3VHEF66LOB6MBD2RDC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:DKEQ5E4J3VHEF66LOB6MBD2RDC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"923ac1b6715e18305aa5f922e97ddb47e6766b9b2d98f3fcfed5093665b71b4b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-26T21:53:36Z","title_canon_sha256":"182107543862f9fc3320a19b139b7abbb709b5070b30d3316ecac0d6d3b42f4a"},"schema_version":"1.0","source":{"id":"2509.22963","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.22963","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"arxiv_version","alias_value":"2509.22963v3","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.22963","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"pith_short_12","alias_value":"DKEQ5E4J3VHE","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"pith_short_16","alias_value":"DKEQ5E4J3VHEF66L","created_at":"2026-05-21T01:04:17Z"},{"alias_kind":"pith_short_8","alias_value":"DKEQ5E4J","created_at":"2026-05-21T01:04:17Z"}],"graph_snapshots":[{"event_id":"sha256:7dde1ede57aa0d6f6a37cb69ddf8d8c31f99affdea9c05da80e1c3f5aed9fb92","target":"graph","created_at":"2026-05-21T01:04:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2509.22963/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) struggles to scale to large, combinatorial action spaces common in many real-world problems. This paper introduces a novel framework for training discrete diffusion models as highly effective policies in these complex settings. Our key innovation is an efficient online training process that ensures stable and effective policy improvement. By leveraging policy mirror descent (PMD) to define an ideal, regularized target policy distribution, we frame the policy update as a distributional matching problem, training the expressive diffusion model to replicate this stable","authors_text":"Aviv Rosenberg, Bo Dai, Craig Boutilier, Guy Tenneholtz, Haitong Ma, Lior Shani, Na Li, Ofir Nabati, Oran Lang, Shie Mannor","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-26T21:53:36Z","title":"Reinforcement Learning with Discrete Diffusion Policies for Combinatorial Action Spaces"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.22963","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f3d885230d3608a082f1a6cdb11b8b4ad60a772ab2f380f98f9b3950ae996698","target":"record","created_at":"2026-05-21T01:04:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"923ac1b6715e18305aa5f922e97ddb47e6766b9b2d98f3fcfed5093665b71b4b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-26T21:53:36Z","title_canon_sha256":"182107543862f9fc3320a19b139b7abbb709b5070b30d3316ecac0d6d3b42f4a"},"schema_version":"1.0","source":{"id":"2509.22963","kind":"arxiv","version":3}},"canonical_sha256":"1a890e9389dd4e42fbcb707cc08f51188f1ed96423a543412a02257024b68b2e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1a890e9389dd4e42fbcb707cc08f51188f1ed96423a543412a02257024b68b2e","first_computed_at":"2026-05-21T01:04:17.133780Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:17.133780Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8nr+zqzaqyQKgrdWJVIw2TZZ2wRmFDHwrV2wXP8yqdXmjmBkf46bci6Recnf8vZu+te3QwqVb6M+/HRItvY5Cw==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:17.134523Z","signed_message":"canonical_sha256_bytes"},"source_id":"2509.22963","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f3d885230d3608a082f1a6cdb11b8b4ad60a772ab2f380f98f9b3950ae996698","sha256:7dde1ede57aa0d6f6a37cb69ddf8d8c31f99affdea9c05da80e1c3f5aed9fb92"],"state_sha256":"def0e22404c5240507cc433c55524abf175252ed6fde6760b87f540ba42e3944"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hK4VJj/YvT+zEtlFYQUAh+fkYkCcyW37yciPE/wkmjwOlveLehcPPLZ6KWq8R7JEFaGtE93U2NZUHu6U6N92AQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T21:09:20.945561Z","bundle_sha256":"e274198a76085dfcc782db05eecd4793c2091965ccc3ca84617124181a604055"}}