{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:QMTYAAIGA6UZQZENXJUAGWIB24","short_pith_number":"pith:QMTYAAIG","schema_version":"1.0","canonical_sha256":"832780010607a998648dba68035901d7053d4e78e23ffd53b94684aae7c63df9","source":{"kind":"arxiv","id":"2510.07650","version":4},"attestation_state":"computed","paper":{"title":"Value Flows","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Benjamin Eysenbach, Chelsea Finn, Chongyi Zheng, Dorsa Sadigh, Perry Dong","submitted_at":"2025-10-09T00:57:40Z","abstract_excerpt":"While most reinforcement learning methods today flatten the distribution of future returns to a single scalar value, distributional RL methods exploit the return distribution to provide stronger learning signals and to enable applications in exploration and safe RL. While the predominant method for estimating the return distribution is by modeling it as a categorical distribution over discrete bins or estimating a finite number of quantiles, such approaches leave unanswered questions about the fine-grained structure of the return distribution and about how to distinguish states with high retur"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.07650","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-10-09T00:57:40Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"ef3979cd93466ed9181db497ea8fc2d1aa7b86b0fc6051b40bacc748f0efa90b","abstract_canon_sha256":"19c8426b8645d6a3b0b6d0ab9490ffcbbc8663c17c623ef467df6a0692595a31"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:35.096778Z","signature_b64":"rqh3qYT9K0KjiCxlF00BTK8Xx177YVPA5W1Fb+c+Wgr57kHHoPJlyRM6AYJqGxIaJp0F4S1yW9J3CHnNoqxQDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"832780010607a998648dba68035901d7053d4e78e23ffd53b94684aae7c63df9","last_reissued_at":"2026-06-02T01:03:35.096210Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:35.096210Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Value Flows","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Benjamin Eysenbach, Chelsea Finn, Chongyi Zheng, Dorsa Sadigh, Perry Dong","submitted_at":"2025-10-09T00:57:40Z","abstract_excerpt":"While most reinforcement learning methods today flatten the distribution of future returns to a single scalar value, distributional RL methods exploit the return distribution to provide stronger learning signals and to enable applications in exploration and safe RL. While the predominant method for estimating the return distribution is by modeling it as a categorical distribution over discrete bins or estimating a finite number of quantiles, such approaches leave unanswered questions about the fine-grained structure of the return distribution and about how to distinguish states with high retur"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.07650","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.07650/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.07650","created_at":"2026-06-02T01:03:35.096282+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.07650v4","created_at":"2026-06-02T01:03:35.096282+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.07650","created_at":"2026-06-02T01:03:35.096282+00:00"},{"alias_kind":"pith_short_12","alias_value":"QMTYAAIGA6UZ","created_at":"2026-06-02T01:03:35.096282+00:00"},{"alias_kind":"pith_short_16","alias_value":"QMTYAAIGA6UZQZEN","created_at":"2026-06-02T01:03:35.096282+00:00"},{"alias_kind":"pith_short_8","alias_value":"QMTYAAIG","created_at":"2026-06-02T01:03:35.096282+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2603.04333","citing_title":"What Does Flow Matching Bring To TD Learning?","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01663","citing_title":"Towards Efficient and Expressive Offline RL via Flow-Anchored Noise-conditioned Q-Learning","ref_index":75,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14265","citing_title":"Reinforcement Learning via Value Gradient Flow","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19730","citing_title":"FASTER: Value-Guided Sampling for Fast RL","ref_index":7,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24","json":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24.json","graph_json":"https://pith.science/api/pith-number/QMTYAAIGA6UZQZENXJUAGWIB24/graph.json","events_json":"https://pith.science/api/pith-number/QMTYAAIGA6UZQZENXJUAGWIB24/events.json","paper":"https://pith.science/paper/QMTYAAIG"},"agent_actions":{"view_html":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24","download_json":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24.json","view_paper":"https://pith.science/paper/QMTYAAIG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.07650&json=true","fetch_graph":"https://pith.science/api/pith-number/QMTYAAIGA6UZQZENXJUAGWIB24/graph.json","fetch_events":"https://pith.science/api/pith-number/QMTYAAIGA6UZQZENXJUAGWIB24/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24/action/storage_attestation","attest_author":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24/action/author_attestation","sign_citation":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24/action/citation_signature","submit_replication":"https://pith.science/pith/QMTYAAIGA6UZQZENXJUAGWIB24/action/replication_record"}},"created_at":"2026-06-02T01:03:35.096282+00:00","updated_at":"2026-06-02T01:03:35.096282+00:00"}