{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:P2Z5KBPGT63TBNYCRBMQQY7QSG","short_pith_number":"pith:P2Z5KBPG","canonical_record":{"source":{"id":"2510.24636","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-28T17:02:46Z","cross_cats_sorted":[],"title_canon_sha256":"ca001475ac0a1184a685a414abfe5bdf6e9033620ec6c324e2537552a7c2c4d4","abstract_canon_sha256":"b4f14c2ad4e3758f4e22c21582817a7778695968cc684e0ad6d23f509f74ce98"},"schema_version":"1.0"},"canonical_sha256":"7eb3d505e69fb730b70288590863f091a4c8a5964b2fee7f3e503ed5ef0b671b","source":{"kind":"arxiv","id":"2510.24636","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.24636","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"arxiv_version","alias_value":"2510.24636v3","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.24636","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"pith_short_12","alias_value":"P2Z5KBPGT63T","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"pith_short_16","alias_value":"P2Z5KBPGT63TBNYC","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"pith_short_8","alias_value":"P2Z5KBPG","created_at":"2026-07-02T01:18:05Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:P2Z5KBPGT63TBNYCRBMQQY7QSG","target":"record","payload":{"canonical_record":{"source":{"id":"2510.24636","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-28T17:02:46Z","cross_cats_sorted":[],"title_canon_sha256":"ca001475ac0a1184a685a414abfe5bdf6e9033620ec6c324e2537552a7c2c4d4","abstract_canon_sha256":"b4f14c2ad4e3758f4e22c21582817a7778695968cc684e0ad6d23f509f74ce98"},"schema_version":"1.0"},"canonical_sha256":"7eb3d505e69fb730b70288590863f091a4c8a5964b2fee7f3e503ed5ef0b671b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:18:05.096912Z","signature_b64":"eZa1N70uvUYxsDJ2YIWZzpovZkcy+SjVHrl23WNx0Miy2XKbVxDSHIrkr8+h31aRNnJg8VFEs9GJ5vNWsFrcAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7eb3d505e69fb730b70288590863f091a4c8a5964b2fee7f3e503ed5ef0b671b","last_reissued_at":"2026-07-02T01:18:05.096445Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:18:05.096445Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.24636","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T01:18:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FWbR+o1dVa39kor/nXioXU/Lj9PUoZ14sOzU8BA6zhDaykLD6YiWrKVAkGXluwJs/35nCga0nUWmRSdkqDp9Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T11:10:21.987336Z"},"content_sha256":"25f87be2778e127148490cfc4e5df3f4cc9ea9f79e97161b8f2c0effb23c29bc","schema_version":"1.0","event_id":"sha256:25f87be2778e127148490cfc4e5df3f4cc9ea9f79e97161b8f2c0effb23c29bc"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:P2Z5KBPGT63TBNYCRBMQQY7QSG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"OpenReward: Learning to Reward Long-form Agentic Tasks via Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Haitao Li, Minghang Zhu, Pengjie Ren, Suzan Verberne, Teng Sun, Zhaochun Ren, Zhengliang Shi, Ziyou Hu","submitted_at":"2025-10-28T17:02:46Z","abstract_excerpt":"Reward models (RMs) have become essential for aligning large language models (LLMs), serving as scalable proxies for human evaluation in both training and inference. However, existing RMs struggle on knowledge-intensive and long-form tasks, where evaluating correctness requires grounding beyond the model's internal knowledge. This limitation hinders them from reliably discriminating subtle quality differences, especially when external evidence is necessary. To address this, we introduce OpenRM, a tool-augmented long-form reward model that systematically judges open-ended responses by invoking "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.24636","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.24636/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T01:18:05Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iz8zO4ZLSL8kd0J0V2DNxL6qVTnHRoyy2vJ3Oi3zYEFnMXKh3m1PfYHIAC6S2IK4/Mg8mToIe5Y0NEGfbmOqBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T11:10:21.987720Z"},"content_sha256":"a61e678dab50d3683a4ebcdf83eedf028e78700e385f42c02f6a98ab5c4e2926","schema_version":"1.0","event_id":"sha256:a61e678dab50d3683a4ebcdf83eedf028e78700e385f42c02f6a98ab5c4e2926"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG/bundle.json","state_url":"https://pith.science/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-03T11:10:21Z","links":{"resolver":"https://pith.science/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG","bundle":"https://pith.science/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG/bundle.json","state":"https://pith.science/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/P2Z5KBPGT63TBNYCRBMQQY7QSG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:P2Z5KBPGT63TBNYCRBMQQY7QSG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b4f14c2ad4e3758f4e22c21582817a7778695968cc684e0ad6d23f509f74ce98","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-28T17:02:46Z","title_canon_sha256":"ca001475ac0a1184a685a414abfe5bdf6e9033620ec6c324e2537552a7c2c4d4"},"schema_version":"1.0","source":{"id":"2510.24636","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.24636","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"arxiv_version","alias_value":"2510.24636v3","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.24636","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"pith_short_12","alias_value":"P2Z5KBPGT63T","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"pith_short_16","alias_value":"P2Z5KBPGT63TBNYC","created_at":"2026-07-02T01:18:05Z"},{"alias_kind":"pith_short_8","alias_value":"P2Z5KBPG","created_at":"2026-07-02T01:18:05Z"}],"graph_snapshots":[{"event_id":"sha256:a61e678dab50d3683a4ebcdf83eedf028e78700e385f42c02f6a98ab5c4e2926","target":"graph","created_at":"2026-07-02T01:18:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.24636/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reward models (RMs) have become essential for aligning large language models (LLMs), serving as scalable proxies for human evaluation in both training and inference. However, existing RMs struggle on knowledge-intensive and long-form tasks, where evaluating correctness requires grounding beyond the model's internal knowledge. This limitation hinders them from reliably discriminating subtle quality differences, especially when external evidence is necessary. To address this, we introduce OpenRM, a tool-augmented long-form reward model that systematically judges open-ended responses by invoking ","authors_text":"Haitao Li, Minghang Zhu, Pengjie Ren, Suzan Verberne, Teng Sun, Zhaochun Ren, Zhengliang Shi, Ziyou Hu","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-28T17:02:46Z","title":"OpenReward: Learning to Reward Long-form Agentic Tasks via Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.24636","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:25f87be2778e127148490cfc4e5df3f4cc9ea9f79e97161b8f2c0effb23c29bc","target":"record","created_at":"2026-07-02T01:18:05Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b4f14c2ad4e3758f4e22c21582817a7778695968cc684e0ad6d23f509f74ce98","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-10-28T17:02:46Z","title_canon_sha256":"ca001475ac0a1184a685a414abfe5bdf6e9033620ec6c324e2537552a7c2c4d4"},"schema_version":"1.0","source":{"id":"2510.24636","kind":"arxiv","version":3}},"canonical_sha256":"7eb3d505e69fb730b70288590863f091a4c8a5964b2fee7f3e503ed5ef0b671b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7eb3d505e69fb730b70288590863f091a4c8a5964b2fee7f3e503ed5ef0b671b","first_computed_at":"2026-07-02T01:18:05.096445Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T01:18:05.096445Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"eZa1N70uvUYxsDJ2YIWZzpovZkcy+SjVHrl23WNx0Miy2XKbVxDSHIrkr8+h31aRNnJg8VFEs9GJ5vNWsFrcAg==","signature_status":"signed_v1","signed_at":"2026-07-02T01:18:05.096912Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.24636","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:25f87be2778e127148490cfc4e5df3f4cc9ea9f79e97161b8f2c0effb23c29bc","sha256:a61e678dab50d3683a4ebcdf83eedf028e78700e385f42c02f6a98ab5c4e2926"],"state_sha256":"542fd25b6a9ac2ea0c3370005bce7217c116a04c3d5c0c7bf27da5c6f82a0c21"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZN1ZQcBYVn7UW4FOtjJVXNi5oQS8qh6OsH6dnXFla73WW7umfrlLdnz8SNV1UwDdGd76HdOguVTGtDFvgk+fDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-03T11:10:21.989770Z","bundle_sha256":"ff41509549876c6e5c8f732d5bb52e80698e73bf4a0856180bea1e7fe7c9928f"}}