{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:UJFSIEYGHRNCXVA7ZYMPITTQU7","short_pith_number":"pith:UJFSIEYG","canonical_record":{"source":{"id":"2606.08735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T17:08:28Z","cross_cats_sorted":[],"title_canon_sha256":"0a8a907edc56e70d051048f2f853e19b41b70295adaa219ef62dc6cbb6ec7762","abstract_canon_sha256":"c8f2bda375e30826797ecf4cf9bba9687305cb2abba15675ed2e93f140875543"},"schema_version":"1.0"},"canonical_sha256":"a24b2413063c5a2bd41fce18f44e70a7eb9be8a71ba15759c42392d7e0956aa3","source":{"kind":"arxiv","id":"2606.08735","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08735","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08735v1","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08735","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"pith_short_12","alias_value":"UJFSIEYGHRNC","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"pith_short_16","alias_value":"UJFSIEYGHRNCXVA7","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"pith_short_8","alias_value":"UJFSIEYG","created_at":"2026-06-09T02:07:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:UJFSIEYGHRNCXVA7ZYMPITTQU7","target":"record","payload":{"canonical_record":{"source":{"id":"2606.08735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T17:08:28Z","cross_cats_sorted":[],"title_canon_sha256":"0a8a907edc56e70d051048f2f853e19b41b70295adaa219ef62dc6cbb6ec7762","abstract_canon_sha256":"c8f2bda375e30826797ecf4cf9bba9687305cb2abba15675ed2e93f140875543"},"schema_version":"1.0"},"canonical_sha256":"a24b2413063c5a2bd41fce18f44e70a7eb9be8a71ba15759c42392d7e0956aa3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:36.220153Z","signature_b64":"ggebiO30+Ox1kVU59l9ZwwUpTkaky6RP00IBXzJzRIsn8NFI3pIYwH+rI640bVMid4E5is8iS59QUkPTnFF/Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a24b2413063c5a2bd41fce18f44e70a7eb9be8a71ba15759c42392d7e0956aa3","last_reissued_at":"2026-06-09T02:07:36.218985Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:36.218985Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.08735","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5Ij9z/DfL4UBQIapto28G1DXqO0AQUhCc0R7xNs7qNqilN7bfVv/ZFprm5sLFbahgO2Ls+KnF3l7d5XH3LL1CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T18:35:16.497857Z"},"content_sha256":"04532e4d00fb11a94d13fb3e0045aa290aa70e9e1db4233cd2b1c6c2923c7501","schema_version":"1.0","event_id":"sha256:04532e4d00fb11a94d13fb3e0045aa290aa70e9e1db4233cd2b1c6c2923c7501"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:UJFSIEYGHRNCXVA7ZYMPITTQU7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Structure-Conditioned Actor-Critic Branches for Quality-Diversity Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Lianrong Zuo, Peilan Xu, Wenjian Luo, Yong Liu","submitted_at":"2026-06-07T17:08:28Z","abstract_excerpt":"Quality-diversity reinforcement learning (QD-RL) aims to construct policy repertoires that contain both high-performing and behaviorally diverse policies. Existing QD-RL methods mainly diversify policy instances after rollout evaluation or use learned value information to improve policy quality and behavior targeting, while the learning branches that generate candidate policies remain less explored. This paper proposes SV-QD-RL, a structure-value coupled framework that represents each candidate as a structure-conditioned actor-critic branch. Each branch contains an actor, a structural mask, a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08735","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.08735/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zzHGl097gIWV+YoFHB8QV+Zhkhu1Z+noHGoTcuFLm/wRI5xmcgBviQTjfxQPfovQPtCXEPuVHlxMXhzCIxM8Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T18:35:16.498646Z"},"content_sha256":"6ec4a4d697c9ff6421fc426e96794c54ff870f9ab9beb3104570e13f278a307b","schema_version":"1.0","event_id":"sha256:6ec4a4d697c9ff6421fc426e96794c54ff870f9ab9beb3104570e13f278a307b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7/bundle.json","state_url":"https://pith.science/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T18:35:16Z","links":{"resolver":"https://pith.science/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7","bundle":"https://pith.science/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7/bundle.json","state":"https://pith.science/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UJFSIEYGHRNCXVA7ZYMPITTQU7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:UJFSIEYGHRNCXVA7ZYMPITTQU7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c8f2bda375e30826797ecf4cf9bba9687305cb2abba15675ed2e93f140875543","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T17:08:28Z","title_canon_sha256":"0a8a907edc56e70d051048f2f853e19b41b70295adaa219ef62dc6cbb6ec7762"},"schema_version":"1.0","source":{"id":"2606.08735","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.08735","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"arxiv_version","alias_value":"2606.08735v1","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.08735","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"pith_short_12","alias_value":"UJFSIEYGHRNC","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"pith_short_16","alias_value":"UJFSIEYGHRNCXVA7","created_at":"2026-06-09T02:07:36Z"},{"alias_kind":"pith_short_8","alias_value":"UJFSIEYG","created_at":"2026-06-09T02:07:36Z"}],"graph_snapshots":[{"event_id":"sha256:6ec4a4d697c9ff6421fc426e96794c54ff870f9ab9beb3104570e13f278a307b","target":"graph","created_at":"2026-06-09T02:07:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.08735/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Quality-diversity reinforcement learning (QD-RL) aims to construct policy repertoires that contain both high-performing and behaviorally diverse policies. Existing QD-RL methods mainly diversify policy instances after rollout evaluation or use learned value information to improve policy quality and behavior targeting, while the learning branches that generate candidate policies remain less explored. This paper proposes SV-QD-RL, a structure-value coupled framework that represents each candidate as a structure-conditioned actor-critic branch. Each branch contains an actor, a structural mask, a ","authors_text":"Lianrong Zuo, Peilan Xu, Wenjian Luo, Yong Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T17:08:28Z","title":"Structure-Conditioned Actor-Critic Branches for Quality-Diversity Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.08735","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:04532e4d00fb11a94d13fb3e0045aa290aa70e9e1db4233cd2b1c6c2923c7501","target":"record","created_at":"2026-06-09T02:07:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c8f2bda375e30826797ecf4cf9bba9687305cb2abba15675ed2e93f140875543","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-07T17:08:28Z","title_canon_sha256":"0a8a907edc56e70d051048f2f853e19b41b70295adaa219ef62dc6cbb6ec7762"},"schema_version":"1.0","source":{"id":"2606.08735","kind":"arxiv","version":1}},"canonical_sha256":"a24b2413063c5a2bd41fce18f44e70a7eb9be8a71ba15759c42392d7e0956aa3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a24b2413063c5a2bd41fce18f44e70a7eb9be8a71ba15759c42392d7e0956aa3","first_computed_at":"2026-06-09T02:07:36.218985Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:36.218985Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ggebiO30+Ox1kVU59l9ZwwUpTkaky6RP00IBXzJzRIsn8NFI3pIYwH+rI640bVMid4E5is8iS59QUkPTnFF/Cw==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:36.220153Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.08735","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:04532e4d00fb11a94d13fb3e0045aa290aa70e9e1db4233cd2b1c6c2923c7501","sha256:6ec4a4d697c9ff6421fc426e96794c54ff870f9ab9beb3104570e13f278a307b"],"state_sha256":"a170cabce5c18612cb7308d81f8dc298573b2e2bcb1666ca9a1fb53a0b74de0d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"h5nSghg8FucrbsVE64NAVmpUoAziwgP9wcTJtu3Crudl5BZ8TcSeYR+1Bf7emUKtOhKg9Y4xfsPSak1AoNzLDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T18:35:16.503059Z","bundle_sha256":"962ea680eb2e0cba03479e2cbc72cbecc704bbb2091504cff98e0ff506ba87fb"}}