{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:SNFFGAUG5VHQNIMTPVUMB73G54","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"035fb1f5c35fce336ae12c5e37cc331cb4822a36bb41b0f90dbce765fee97c3e","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:48:36Z","title_canon_sha256":"49d68767a893b79144f7e0e4f63473c98d96f85600129898db1d675af3834b9b"},"schema_version":"1.0","source":{"id":"2605.20256","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20256","created_at":"2026-05-21T00:04:22Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20256v1","created_at":"2026-05-21T00:04:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20256","created_at":"2026-05-21T00:04:22Z"},{"alias_kind":"pith_short_12","alias_value":"SNFFGAUG5VHQ","created_at":"2026-05-21T00:04:22Z"},{"alias_kind":"pith_short_16","alias_value":"SNFFGAUG5VHQNIMT","created_at":"2026-05-21T00:04:22Z"},{"alias_kind":"pith_short_8","alias_value":"SNFFGAUG","created_at":"2026-05-21T00:04:22Z"}],"graph_snapshots":[{"event_id":"sha256:45f465907eb6093c261beb889f21f4f5e06988b5a78de14f396cd02a57a8d413","target":"graph","created_at":"2026-05-21T00:04:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.20256/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning has become a cornerstone for aligning and unlocking the reasoning capabilities of large-scale models. At its core, the training loop of GRPO and its variants alternates between rollout sampling and policy update. Unlike supervised learning, where each gradient step is anchored to an explicit ground-truth target, the optimal gradient direction for updating model parameters in this setting is not known a priori; the high-quality rollouts drawn during the sampling stage therefore act as the implicit \"teacher\" that guides every parameter update. However, GRPO adopt a simple ","authors_text":"Likang Xiao, Liu Liu, Peng Jiang, Quan Chen, Wenjun Wu, Xikai Zhang, Yanhua Cheng, Yingze Zhang, Yongzhi Li","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:48:36Z","title":"FBOS-RL: Feedback-Driven Bi-Objective Synergistic Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20256","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9867f7f54c9a203c59c4601cd83c55197594bab14516852bdf4d7374f1ca4695","target":"record","created_at":"2026-05-21T00:04:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"035fb1f5c35fce336ae12c5e37cc331cb4822a36bb41b0f90dbce765fee97c3e","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-18T12:48:36Z","title_canon_sha256":"49d68767a893b79144f7e0e4f63473c98d96f85600129898db1d675af3834b9b"},"schema_version":"1.0","source":{"id":"2605.20256","kind":"arxiv","version":1}},"canonical_sha256":"934a530286ed4f06a1937d68c0ff66ef19982798bb64d29d8b351de5d7e1ef74","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"934a530286ed4f06a1937d68c0ff66ef19982798bb64d29d8b351de5d7e1ef74","first_computed_at":"2026-05-21T00:04:22.923542Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T00:04:22.923542Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RP7PZ7sI4ztrhUkCF9Q35f8EFGcwM/eQh3sgBxUi+D2E8ccTovlVH+nXI/hA1QMgNa02+MXbUn4tPwJz3L3DBA==","signature_status":"signed_v1","signed_at":"2026-05-21T00:04:22.924027Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.20256","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9867f7f54c9a203c59c4601cd83c55197594bab14516852bdf4d7374f1ca4695","sha256:45f465907eb6093c261beb889f21f4f5e06988b5a78de14f396cd02a57a8d413"],"state_sha256":"0eea301e515fb2b1b6f3cd3e655b98f42122834f856bc82da3df9ab061b3639b"}