{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:6J4O2XQBDV43OMQ52ZULLKYGPS","short_pith_number":"pith:6J4O2XQB","schema_version":"1.0","canonical_sha256":"f278ed5e011d79b7321dd668b5ab067caee8d3f43f236ddcdf9a22ccf1a031b4","source":{"kind":"arxiv","id":"2606.18963","version":1},"attestation_state":"computed","paper":{"title":"Online Reward-Punishment Learning from Fixed-Channel Perceptual Event Streams without Environment Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Zirong Li","submitted_at":"2026-06-17T11:43:10Z","abstract_excerpt":"We study online reward-punishment learning when the environment provides no scalar reward or evaluative label. At each step the agent receives only a fixed-channel perceptual packet, and quantities such as pain, energy, contact, damage, or cognitive error are treated as perceptual dimensions whose valence must be inferred from transition consequences. OHIRL separates four roles: M_psi learns next-packet prediction, D_omega models residual dynamics, C_eta is a fixed internal post-transition trajectory evaluator, and B_xi learns to use the resulting value evidence for later policy updates and ac"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.18963","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-17T11:43:10Z","cross_cats_sorted":[],"title_canon_sha256":"404c731a965ababbb56880d630b827e301d19c3eeb4e2ff8b8565ebd253cb01b","abstract_canon_sha256":"4aef99bbe3a8c7ac17294b2e54db1aebb55c8bed06c977346ee34f76ec587fec"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:11:53.222161Z","signature_b64":"lksS+CLVklnV9Gp1tp53I0LI+GUSIktI5/mPwyNqrf4FM64llxxmeiGSLpgGzmtB2RnAE5ufDwKB/sadOpZXBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f278ed5e011d79b7321dd668b5ab067caee8d3f43f236ddcdf9a22ccf1a031b4","last_reissued_at":"2026-06-19T16:11:53.221681Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:11:53.221681Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Online Reward-Punishment Learning from Fixed-Channel Perceptual Event Streams without Environment Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Zirong Li","submitted_at":"2026-06-17T11:43:10Z","abstract_excerpt":"We study online reward-punishment learning when the environment provides no scalar reward or evaluative label. At each step the agent receives only a fixed-channel perceptual packet, and quantities such as pain, energy, contact, damage, or cognitive error are treated as perceptual dimensions whose valence must be inferred from transition consequences. OHIRL separates four roles: M_psi learns next-packet prediction, D_omega models residual dynamics, C_eta is a fixed internal post-transition trajectory evaluator, and B_xi learns to use the resulting value evidence for later policy updates and ac"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18963","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.18963/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.18963","created_at":"2026-06-19T16:11:53.221740+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.18963v1","created_at":"2026-06-19T16:11:53.221740+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18963","created_at":"2026-06-19T16:11:53.221740+00:00"},{"alias_kind":"pith_short_12","alias_value":"6J4O2XQBDV43","created_at":"2026-06-19T16:11:53.221740+00:00"},{"alias_kind":"pith_short_16","alias_value":"6J4O2XQBDV43OMQ5","created_at":"2026-06-19T16:11:53.221740+00:00"},{"alias_kind":"pith_short_8","alias_value":"6J4O2XQB","created_at":"2026-06-19T16:11:53.221740+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS","json":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS.json","graph_json":"https://pith.science/api/pith-number/6J4O2XQBDV43OMQ52ZULLKYGPS/graph.json","events_json":"https://pith.science/api/pith-number/6J4O2XQBDV43OMQ52ZULLKYGPS/events.json","paper":"https://pith.science/paper/6J4O2XQB"},"agent_actions":{"view_html":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS","download_json":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS.json","view_paper":"https://pith.science/paper/6J4O2XQB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.18963&json=true","fetch_graph":"https://pith.science/api/pith-number/6J4O2XQBDV43OMQ52ZULLKYGPS/graph.json","fetch_events":"https://pith.science/api/pith-number/6J4O2XQBDV43OMQ52ZULLKYGPS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS/action/storage_attestation","attest_author":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS/action/author_attestation","sign_citation":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS/action/citation_signature","submit_replication":"https://pith.science/pith/6J4O2XQBDV43OMQ52ZULLKYGPS/action/replication_record"}},"created_at":"2026-06-19T16:11:53.221740+00:00","updated_at":"2026-06-19T16:11:53.221740+00:00"}