{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:5YOAEEUFKIMWKRHFYKFECVOX6O","short_pith_number":"pith:5YOAEEUF","canonical_record":{"source":{"id":"2510.15859","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-17T17:51:28Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"152f22bd3709264c0b0f860cbb2e698cbc4dba7552aa3d943509a9d6fc483da2","abstract_canon_sha256":"c1fd632c9b7c1c50ed0298fd40a63704f47b66625d5e8713c714ea53becc5392"},"schema_version":"1.0"},"canonical_sha256":"ee1c02128552196544e5c28a4155d7f395aade67ce16ed7e1a994ef7b5d4502c","source":{"kind":"arxiv","id":"2510.15859","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.15859","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"arxiv_version","alias_value":"2510.15859v4","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.15859","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_12","alias_value":"5YOAEEUFKIMW","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_16","alias_value":"5YOAEEUFKIMWKRHF","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_8","alias_value":"5YOAEEUF","created_at":"2026-05-28T02:04:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:5YOAEEUFKIMWKRHFYKFECVOX6O","target":"record","payload":{"canonical_record":{"source":{"id":"2510.15859","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-17T17:51:28Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"152f22bd3709264c0b0f860cbb2e698cbc4dba7552aa3d943509a9d6fc483da2","abstract_canon_sha256":"c1fd632c9b7c1c50ed0298fd40a63704f47b66625d5e8713c714ea53becc5392"},"schema_version":"1.0"},"canonical_sha256":"ee1c02128552196544e5c28a4155d7f395aade67ce16ed7e1a994ef7b5d4502c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T02:04:43.881140Z","signature_b64":"9CHTrwx1hAcwlaXRRtKZ0KaUyvuBYmWBLGziNoMKPtHCS7UHjdYRxV2iJU3vZrkCBeM0mbYEjVzYd5Oij9wDBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ee1c02128552196544e5c28a4155d7f395aade67ce16ed7e1a994ef7b5d4502c","last_reissued_at":"2026-05-28T02:04:43.880474Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T02:04:43.880474Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.15859","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T02:04:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"l/rxnMACsgC4b6w+hiJvJLEnRiN/Evm+GHxTlIn1M3lXNOGfzDyk3S2JVhL3OM3wAOUETl16Vz52FBrplt0yCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T14:42:59.735309Z"},"content_sha256":"f8ccaf121c26b0a0e88eb79e30d785580c5a744027717fc0bf1660931bc57b83","schema_version":"1.0","event_id":"sha256:f8ccaf121c26b0a0e88eb79e30d785580c5a744027717fc0bf1660931bc57b83"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:5YOAEEUFKIMWKRHFYKFECVOX6O","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"InfiMed-ORBIT: Aligning LLMs on Open-Ended Complex Tasks via Rubric-Based Incremental Training","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Congkai Xie, Hongxia Yang, Pengkai Wang, Pengwei Liu, Qi Zuo, Zhijie Sang","submitted_at":"2025-10-17T17:51:28Z","abstract_excerpt":"Reinforcement learning (RL) has driven recent breakthroughs in large language models (LLMs), especially for tasks where rewards can be computed automatically, such as code generation. However, it is less effective in open-ended medical dialogue, where feedback is ambiguous, context-dependent, and difficult to summarize into a single scalar signal-often requiring heavily supervised reward models and risking reward hacking. Thus, we introduce ORBIT, an open-ended rubric-based incremental training framework tailored for critical medical dialogues. ORBIT integrates medical dialogue construction wi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.15859","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.15859/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T02:04:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"III2Grr3XPp+qEpRyDPJvpprSQ7sJ+Og0ef/E/0clehg9X1OcbemtkFN0jdxEa9SRV335X8y5Gn+eVLn+/9jBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T14:42:59.735709Z"},"content_sha256":"4875a8cf3025be5980596e6ef64773fc2b6c03d48d72b77b9ad950dda9f4ee9c","schema_version":"1.0","event_id":"sha256:4875a8cf3025be5980596e6ef64773fc2b6c03d48d72b77b9ad950dda9f4ee9c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5YOAEEUFKIMWKRHFYKFECVOX6O/bundle.json","state_url":"https://pith.science/pith/5YOAEEUFKIMWKRHFYKFECVOX6O/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5YOAEEUFKIMWKRHFYKFECVOX6O/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T14:42:59Z","links":{"resolver":"https://pith.science/pith/5YOAEEUFKIMWKRHFYKFECVOX6O","bundle":"https://pith.science/pith/5YOAEEUFKIMWKRHFYKFECVOX6O/bundle.json","state":"https://pith.science/pith/5YOAEEUFKIMWKRHFYKFECVOX6O/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5YOAEEUFKIMWKRHFYKFECVOX6O/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:5YOAEEUFKIMWKRHFYKFECVOX6O","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c1fd632c9b7c1c50ed0298fd40a63704f47b66625d5e8713c714ea53becc5392","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-17T17:51:28Z","title_canon_sha256":"152f22bd3709264c0b0f860cbb2e698cbc4dba7552aa3d943509a9d6fc483da2"},"schema_version":"1.0","source":{"id":"2510.15859","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.15859","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"arxiv_version","alias_value":"2510.15859v4","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.15859","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_12","alias_value":"5YOAEEUFKIMW","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_16","alias_value":"5YOAEEUFKIMWKRHF","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_8","alias_value":"5YOAEEUF","created_at":"2026-05-28T02:04:43Z"}],"graph_snapshots":[{"event_id":"sha256:4875a8cf3025be5980596e6ef64773fc2b6c03d48d72b77b9ad950dda9f4ee9c","target":"graph","created_at":"2026-05-28T02:04:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.15859/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has driven recent breakthroughs in large language models (LLMs), especially for tasks where rewards can be computed automatically, such as code generation. However, it is less effective in open-ended medical dialogue, where feedback is ambiguous, context-dependent, and difficult to summarize into a single scalar signal-often requiring heavily supervised reward models and risking reward hacking. Thus, we introduce ORBIT, an open-ended rubric-based incremental training framework tailored for critical medical dialogues. ORBIT integrates medical dialogue construction wi","authors_text":"Congkai Xie, Hongxia Yang, Pengkai Wang, Pengwei Liu, Qi Zuo, Zhijie Sang","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-17T17:51:28Z","title":"InfiMed-ORBIT: Aligning LLMs on Open-Ended Complex Tasks via Rubric-Based Incremental Training"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.15859","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f8ccaf121c26b0a0e88eb79e30d785580c5a744027717fc0bf1660931bc57b83","target":"record","created_at":"2026-05-28T02:04:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c1fd632c9b7c1c50ed0298fd40a63704f47b66625d5e8713c714ea53becc5392","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-17T17:51:28Z","title_canon_sha256":"152f22bd3709264c0b0f860cbb2e698cbc4dba7552aa3d943509a9d6fc483da2"},"schema_version":"1.0","source":{"id":"2510.15859","kind":"arxiv","version":4}},"canonical_sha256":"ee1c02128552196544e5c28a4155d7f395aade67ce16ed7e1a994ef7b5d4502c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ee1c02128552196544e5c28a4155d7f395aade67ce16ed7e1a994ef7b5d4502c","first_computed_at":"2026-05-28T02:04:43.880474Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T02:04:43.880474Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"9CHTrwx1hAcwlaXRRtKZ0KaUyvuBYmWBLGziNoMKPtHCS7UHjdYRxV2iJU3vZrkCBeM0mbYEjVzYd5Oij9wDBg==","signature_status":"signed_v1","signed_at":"2026-05-28T02:04:43.881140Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.15859","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f8ccaf121c26b0a0e88eb79e30d785580c5a744027717fc0bf1660931bc57b83","sha256:4875a8cf3025be5980596e6ef64773fc2b6c03d48d72b77b9ad950dda9f4ee9c"],"state_sha256":"fca128ec1ddd3aaa9baadc0c02bb0cd15d1e52c2a29911992db33cdd9bbfad16"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9UglOMznuTMpHvaUE/FAdEvJlDZTyjo9iiJizfA7t/uPR0yRjaVIYbJ3b+gyUMemRD1hc5hloo5NqA/7NJIoDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T14:42:59.741739Z","bundle_sha256":"bfa4d04660cb7d87456f753b9de16760869cf183a7e887e12bc918d7d4e73d99"}}