{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:QZT3SBDZJPXHXSSGASVBHCM5AS","short_pith_number":"pith:QZT3SBDZ","canonical_record":{"source":{"id":"1901.09207","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-26T13:08:08Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1d0385f833fd58f96668885000bcd77b5cb781d04e3104083e1183239e30f5f2","abstract_canon_sha256":"87ddc4851ea396a287ae4363fd0e80862034b2e6ac08bc3d9ac1f5e2e71b0376"},"schema_version":"1.0"},"canonical_sha256":"8667b904794bee7bca4604aa13899d049d510857eb94be9987cb0be9b97b1d6d","source":{"kind":"arxiv","id":"1901.09207","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.09207","created_at":"2026-05-17T23:52:21Z"},{"alias_kind":"arxiv_version","alias_value":"1901.09207v2","created_at":"2026-05-17T23:52:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.09207","created_at":"2026-05-17T23:52:21Z"},{"alias_kind":"pith_short_12","alias_value":"QZT3SBDZJPXH","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"QZT3SBDZJPXHXSSG","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"QZT3SBDZ","created_at":"2026-05-18T12:33:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:QZT3SBDZJPXHXSSGASVBHCM5AS","target":"record","payload":{"canonical_record":{"source":{"id":"1901.09207","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-26T13:08:08Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"1d0385f833fd58f96668885000bcd77b5cb781d04e3104083e1183239e30f5f2","abstract_canon_sha256":"87ddc4851ea396a287ae4363fd0e80862034b2e6ac08bc3d9ac1f5e2e71b0376"},"schema_version":"1.0"},"canonical_sha256":"8667b904794bee7bca4604aa13899d049d510857eb94be9987cb0be9b97b1d6d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:52:21.530547Z","signature_b64":"URvYFKN3oVJuG6xvy8XbH7Jrsuyq55eEHcNCw8ITDAqRenvE7Xd7bBzs8PVHbKeKqtzgqHvQNrt9rCCW/LwNAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8667b904794bee7bca4604aa13899d049d510857eb94be9987cb0be9b97b1d6d","last_reissued_at":"2026-05-17T23:52:21.529914Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:52:21.529914Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.09207","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:52:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mMuUZXn2gs+MJUaU7zoGSWU2VEDPhBiGFBvZTuCKZyLCDBejjV7BZBN3m9N3SezrUgdo6XI5ZKdc2J5+Q8W5AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T19:58:55.659195Z"},"content_sha256":"6a2ebbbb41a449b934f22450ac72d2c32176c66f9cc6e7d0c985fcf4b37b241e","schema_version":"1.0","event_id":"sha256:6a2ebbbb41a449b934f22450ac72d2c32176c66f9cc6e7d0c985fcf4b37b241e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:QZT3SBDZJPXHXSSGASVBHCM5AS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Probabilistic Recursive Reasoning for Multi-Agent Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jun Wang, Rui Luo, Wei Pan, Yaodong Yang, Ying Wen","submitted_at":"2019-01-26T13:08:08Z","abstract_excerpt":"Humans are capable of attributing latent mental contents such as beliefs or intentions to others. The social skill is critical in daily life for reasoning about the potential consequences of others' behaviors so as to plan ahead. It is known that humans use such reasoning ability recursively by considering what others believe about their own beliefs. In this paper, we start from level-$1$ recursion and introduce a probabilistic recursive reasoning (PR2) framework for multi-agent reinforcement learning. Our hypothesis is that it is beneficial for each agent to account for how the opponents woul"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.09207","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:52:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gpodEsXcbpbdHksS8ZaWkITF6IXZHFYsHhTdTg4kuJ3yiIp0MulAEuZb9qmpHaxAbbnZStbsT/csj5RiACjzCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T19:58:55.659884Z"},"content_sha256":"aa550b76c5983d60c2c295dfbb03e0ba6784bb6a4e1d70f60cd9d3466fdef393","schema_version":"1.0","event_id":"sha256:aa550b76c5983d60c2c295dfbb03e0ba6784bb6a4e1d70f60cd9d3466fdef393"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QZT3SBDZJPXHXSSGASVBHCM5AS/bundle.json","state_url":"https://pith.science/pith/QZT3SBDZJPXHXSSGASVBHCM5AS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QZT3SBDZJPXHXSSGASVBHCM5AS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T19:58:55Z","links":{"resolver":"https://pith.science/pith/QZT3SBDZJPXHXSSGASVBHCM5AS","bundle":"https://pith.science/pith/QZT3SBDZJPXHXSSGASVBHCM5AS/bundle.json","state":"https://pith.science/pith/QZT3SBDZJPXHXSSGASVBHCM5AS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QZT3SBDZJPXHXSSGASVBHCM5AS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:QZT3SBDZJPXHXSSGASVBHCM5AS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"87ddc4851ea396a287ae4363fd0e80862034b2e6ac08bc3d9ac1f5e2e71b0376","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-26T13:08:08Z","title_canon_sha256":"1d0385f833fd58f96668885000bcd77b5cb781d04e3104083e1183239e30f5f2"},"schema_version":"1.0","source":{"id":"1901.09207","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.09207","created_at":"2026-05-17T23:52:21Z"},{"alias_kind":"arxiv_version","alias_value":"1901.09207v2","created_at":"2026-05-17T23:52:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.09207","created_at":"2026-05-17T23:52:21Z"},{"alias_kind":"pith_short_12","alias_value":"QZT3SBDZJPXH","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_16","alias_value":"QZT3SBDZJPXHXSSG","created_at":"2026-05-18T12:33:27Z"},{"alias_kind":"pith_short_8","alias_value":"QZT3SBDZ","created_at":"2026-05-18T12:33:27Z"}],"graph_snapshots":[{"event_id":"sha256:aa550b76c5983d60c2c295dfbb03e0ba6784bb6a4e1d70f60cd9d3466fdef393","target":"graph","created_at":"2026-05-17T23:52:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Humans are capable of attributing latent mental contents such as beliefs or intentions to others. The social skill is critical in daily life for reasoning about the potential consequences of others' behaviors so as to plan ahead. It is known that humans use such reasoning ability recursively by considering what others believe about their own beliefs. In this paper, we start from level-$1$ recursion and introduce a probabilistic recursive reasoning (PR2) framework for multi-agent reinforcement learning. Our hypothesis is that it is beneficial for each agent to account for how the opponents woul","authors_text":"Jun Wang, Rui Luo, Wei Pan, Yaodong Yang, Ying Wen","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-26T13:08:08Z","title":"Probabilistic Recursive Reasoning for Multi-Agent Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.09207","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6a2ebbbb41a449b934f22450ac72d2c32176c66f9cc6e7d0c985fcf4b37b241e","target":"record","created_at":"2026-05-17T23:52:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"87ddc4851ea396a287ae4363fd0e80862034b2e6ac08bc3d9ac1f5e2e71b0376","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-26T13:08:08Z","title_canon_sha256":"1d0385f833fd58f96668885000bcd77b5cb781d04e3104083e1183239e30f5f2"},"schema_version":"1.0","source":{"id":"1901.09207","kind":"arxiv","version":2}},"canonical_sha256":"8667b904794bee7bca4604aa13899d049d510857eb94be9987cb0be9b97b1d6d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8667b904794bee7bca4604aa13899d049d510857eb94be9987cb0be9b97b1d6d","first_computed_at":"2026-05-17T23:52:21.529914Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:52:21.529914Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"URvYFKN3oVJuG6xvy8XbH7Jrsuyq55eEHcNCw8ITDAqRenvE7Xd7bBzs8PVHbKeKqtzgqHvQNrt9rCCW/LwNAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:52:21.530547Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.09207","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6a2ebbbb41a449b934f22450ac72d2c32176c66f9cc6e7d0c985fcf4b37b241e","sha256:aa550b76c5983d60c2c295dfbb03e0ba6784bb6a4e1d70f60cd9d3466fdef393"],"state_sha256":"9327ba02061398636a361b6b1249648132846d8f3dc72fce018d72b9e99799b6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m1Az865ukhqMlO0mwluMasfu7tHS1B2YFL6if50zk4Fk92EE1OdOA7eXQ1+B06Lyfpu1/dpLQPWEcI5MVZmVDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T19:58:55.663587Z","bundle_sha256":"2ab87f5cda5c7de5ee94ba28bee0a884f87fb64f05fbce7e8a3b7b5fb58149e0"}}