{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:IJEABUGPDAF5ZBJUWL4PKTYT5U","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8200e8e346f60fe9e1971559c1d2c4cec3f455b135c4634fd40de3cd90419718","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T14:19:47Z","title_canon_sha256":"c439e1aa16f742e15563406654cfe467a9e865205e0becf1b8b75265bd1cd223"},"schema_version":"1.0","source":{"id":"2604.02091","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.02091","created_at":"2026-07-03T01:17:55Z"},{"alias_kind":"arxiv_version","alias_value":"2604.02091v2","created_at":"2026-07-03T01:17:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02091","created_at":"2026-07-03T01:17:55Z"},{"alias_kind":"pith_short_12","alias_value":"IJEABUGPDAF5","created_at":"2026-07-03T01:17:55Z"},{"alias_kind":"pith_short_16","alias_value":"IJEABUGPDAF5ZBJU","created_at":"2026-07-03T01:17:55Z"},{"alias_kind":"pith_short_8","alias_value":"IJEABUGP","created_at":"2026-07-03T01:17:55Z"}],"graph_snapshots":[{"event_id":"sha256:c60f3f622a96cf02b258bc86292d800cee33d267bf75e8ca22c3e7bf984c0bb6","target":"graph","created_at":"2026-07-03T01:17:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.02091/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Rerankers play a pivotal role in refining retrieval results for Retrieval-Augmented Generation. However, current reranking models are typically optimized on static human annotated relevance labels in isolation, decoupled from the downstream generation process. This isolation leads to a fundamental misalignment: documents identified as topically relevant by information retrieval metrics often fail to provide the actual utility required by the LLM for precise answer generation. To bridge this gap, we introduce ReRanking Preference Optimization (RRPO), a reinforcement learning framework that dire","authors_text":"Cangqi Zhou, Fanfan Wang, Rui Xia, Xiangqing Shen, Xinyu Dai, Yuhang Wu, Zhen Wu","cross_cats":["cs.AI","cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T14:19:47Z","title":"Optimizing RAG Rerankers with LLM Feedback via Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.02091","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:edf49ea1c0b303d20a799bce78af5e07f85ef86787bcf4163c07ceb7d2a2202a","target":"record","created_at":"2026-07-03T01:17:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8200e8e346f60fe9e1971559c1d2c4cec3f455b135c4634fd40de3cd90419718","cross_cats_sorted":["cs.AI","cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T14:19:47Z","title_canon_sha256":"c439e1aa16f742e15563406654cfe467a9e865205e0becf1b8b75265bd1cd223"},"schema_version":"1.0","source":{"id":"2604.02091","kind":"arxiv","version":2}},"canonical_sha256":"424800d0cf180bdc8534b2f8f54f13ed3679478adfc08d7c46946a83d52fda1c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"424800d0cf180bdc8534b2f8f54f13ed3679478adfc08d7c46946a83d52fda1c","first_computed_at":"2026-07-03T01:17:55.109551Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T01:17:55.109551Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ldgvtbS0FiP3q0t9MxfA4jeeKL8ggr9tHOZYoLfLDBZnvjlvorGw8ZloTbfPyiwvU3WnGWzZ0vNCW7hqo7stCg==","signature_status":"signed_v1","signed_at":"2026-07-03T01:17:55.110096Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.02091","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:edf49ea1c0b303d20a799bce78af5e07f85ef86787bcf4163c07ceb7d2a2202a","sha256:c60f3f622a96cf02b258bc86292d800cee33d267bf75e8ca22c3e7bf984c0bb6"],"state_sha256":"d5f34c2a7d535a6e620132d495e38d4b79c7153b9375aa217817db3e7e0b587f"}