{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:XQWV44UTEW6X3RZFLYSKJNKSGM","short_pith_number":"pith:XQWV44UT","canonical_record":{"source":{"id":"2603.19310","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-13T18:00:16Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c96b6bf22dd94d4c87a4123def10db362c77c9cfd618973ae3f9ae1fc870ed46","abstract_canon_sha256":"78805bc1da25a5cc04824d3713062c40018b422e73155fef7a327178e4e0f5b0"},"schema_version":"1.0"},"canonical_sha256":"bc2d5e729325bd7dc7255e24a4b5523322d100fc03a1c50dce3603a60d57b3f1","source":{"kind":"arxiv","id":"2603.19310","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.19310","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"arxiv_version","alias_value":"2603.19310v3","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.19310","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"pith_short_12","alias_value":"XQWV44UTEW6X","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"pith_short_16","alias_value":"XQWV44UTEW6X3RZF","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"pith_short_8","alias_value":"XQWV44UT","created_at":"2026-05-22T01:04:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:XQWV44UTEW6X3RZFLYSKJNKSGM","target":"record","payload":{"canonical_record":{"source":{"id":"2603.19310","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-13T18:00:16Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c96b6bf22dd94d4c87a4123def10db362c77c9cfd618973ae3f9ae1fc870ed46","abstract_canon_sha256":"78805bc1da25a5cc04824d3713062c40018b422e73155fef7a327178e4e0f5b0"},"schema_version":"1.0"},"canonical_sha256":"bc2d5e729325bd7dc7255e24a4b5523322d100fc03a1c50dce3603a60d57b3f1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:00.559042Z","signature_b64":"mcf4TPV6wA74Tv49icFRrNbrTMQ++BKpD7zXzftvLtMtRBBEB25L17pyaR3XLzyrYOhg7rJ98Ck8d7orcUjuDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bc2d5e729325bd7dc7255e24a4b5523322d100fc03a1c50dce3603a60d57b3f1","last_reissued_at":"2026-05-22T01:04:00.558192Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:00.558192Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.19310","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3uwxMABHhoLgGPm6BNMMqBATNJo0bHyP2wJ1hZoi8day6dXH7NlTe+DyaAmNP2P5vXMzYKPLIz0FB1ulRCwEDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T20:39:28.155794Z"},"content_sha256":"a317195cc7eba1cfbb39803982c1825d4ab85db0f8490c4e125b03391f8cbfe6","schema_version":"1.0","event_id":"sha256:a317195cc7eba1cfbb39803982c1825d4ab85db0f8490c4e125b03391f8cbfe6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:XQWV44UTEW6X3RZFLYSKJNKSGM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MemReward: Graph-Based Experience Memory for LLM Reward Prediction with Limited Labels","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Ge Liu, Jiaxuan You, Shuang Yang, Tao Feng, Tianyang Luo, Yan Xie, Zhigang Hua","submitted_at":"2026-03-13T18:00:16Z","abstract_excerpt":"Reinforcement learning has emerged as a powerful paradigm for improving large language model (LLM) reasoning, where rollouts are sampled from the policy and reward signals computed on those rollouts are used to update the policy. However, in data-scarce scenarios, obtaining ground-truth labels to verify rollouts at scale often requires expensive human annotation or labor-intensive expert verification. For instance, evaluating mathematical proofs demands expert review, and open-ended question answering lacks definitive ground truth. When ground-truth labels are scarce, the effectiveness of rein"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.19310","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.19310/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m2Xl34ep6cIegxYRLY+30nDD8LynvAjz8DoNrKXsEDZ44dQLoN6uOhPPYKahLqIrJ8h/xqW/IlGj3bp7jl1cAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T20:39:28.156194Z"},"content_sha256":"ba2c143b9bb6e3bfb99adc4e28a0de4b43ad4e6c410f7cee2968cdaf9d8ea3cc","schema_version":"1.0","event_id":"sha256:ba2c143b9bb6e3bfb99adc4e28a0de4b43ad4e6c410f7cee2968cdaf9d8ea3cc"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XQWV44UTEW6X3RZFLYSKJNKSGM/bundle.json","state_url":"https://pith.science/pith/XQWV44UTEW6X3RZFLYSKJNKSGM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XQWV44UTEW6X3RZFLYSKJNKSGM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T20:39:28Z","links":{"resolver":"https://pith.science/pith/XQWV44UTEW6X3RZFLYSKJNKSGM","bundle":"https://pith.science/pith/XQWV44UTEW6X3RZFLYSKJNKSGM/bundle.json","state":"https://pith.science/pith/XQWV44UTEW6X3RZFLYSKJNKSGM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XQWV44UTEW6X3RZFLYSKJNKSGM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XQWV44UTEW6X3RZFLYSKJNKSGM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"78805bc1da25a5cc04824d3713062c40018b422e73155fef7a327178e4e0f5b0","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-13T18:00:16Z","title_canon_sha256":"c96b6bf22dd94d4c87a4123def10db362c77c9cfd618973ae3f9ae1fc870ed46"},"schema_version":"1.0","source":{"id":"2603.19310","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.19310","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"arxiv_version","alias_value":"2603.19310v3","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.19310","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"pith_short_12","alias_value":"XQWV44UTEW6X","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"pith_short_16","alias_value":"XQWV44UTEW6X3RZF","created_at":"2026-05-22T01:04:00Z"},{"alias_kind":"pith_short_8","alias_value":"XQWV44UT","created_at":"2026-05-22T01:04:00Z"}],"graph_snapshots":[{"event_id":"sha256:ba2c143b9bb6e3bfb99adc4e28a0de4b43ad4e6c410f7cee2968cdaf9d8ea3cc","target":"graph","created_at":"2026-05-22T01:04:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.19310/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning has emerged as a powerful paradigm for improving large language model (LLM) reasoning, where rollouts are sampled from the policy and reward signals computed on those rollouts are used to update the policy. However, in data-scarce scenarios, obtaining ground-truth labels to verify rollouts at scale often requires expensive human annotation or labor-intensive expert verification. For instance, evaluating mathematical proofs demands expert review, and open-ended question answering lacks definitive ground truth. When ground-truth labels are scarce, the effectiveness of rein","authors_text":"Ge Liu, Jiaxuan You, Shuang Yang, Tao Feng, Tianyang Luo, Yan Xie, Zhigang Hua","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-13T18:00:16Z","title":"MemReward: Graph-Based Experience Memory for LLM Reward Prediction with Limited Labels"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.19310","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a317195cc7eba1cfbb39803982c1825d4ab85db0f8490c4e125b03391f8cbfe6","target":"record","created_at":"2026-05-22T01:04:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"78805bc1da25a5cc04824d3713062c40018b422e73155fef7a327178e4e0f5b0","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-03-13T18:00:16Z","title_canon_sha256":"c96b6bf22dd94d4c87a4123def10db362c77c9cfd618973ae3f9ae1fc870ed46"},"schema_version":"1.0","source":{"id":"2603.19310","kind":"arxiv","version":3}},"canonical_sha256":"bc2d5e729325bd7dc7255e24a4b5523322d100fc03a1c50dce3603a60d57b3f1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bc2d5e729325bd7dc7255e24a4b5523322d100fc03a1c50dce3603a60d57b3f1","first_computed_at":"2026-05-22T01:04:00.558192Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:00.558192Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mcf4TPV6wA74Tv49icFRrNbrTMQ++BKpD7zXzftvLtMtRBBEB25L17pyaR3XLzyrYOhg7rJ98Ck8d7orcUjuDw==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:00.559042Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.19310","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a317195cc7eba1cfbb39803982c1825d4ab85db0f8490c4e125b03391f8cbfe6","sha256:ba2c143b9bb6e3bfb99adc4e28a0de4b43ad4e6c410f7cee2968cdaf9d8ea3cc"],"state_sha256":"aee75f99b8df9237d80f699f507cfd49d57c1c81ad72ae0ff75a5c95d5dc9fac"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W/vacmSgOdf/ymwH5KuZcmnYW1x0jbTJncKrNFHsfI4F/d57Xritr4QyzLdlkMRoN5I8o/dizAXA3Wm6uu4UBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T20:39:28.158350Z","bundle_sha256":"16a3efefff1b5790f5ecb2440af03ed9ef56c440ffba4244c7dda2d7250dfd00"}}