{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:NLTTJ55ZTAZVFPVJBNFGDIAPZ3","short_pith_number":"pith:NLTTJ55Z","canonical_record":{"source":{"id":"1803.03021","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-08T10:02:42Z","cross_cats_sorted":[],"title_canon_sha256":"7f2790a4f953ebf65e1e25fbad664fe722ba3367b93c871fff9902563b843ad2","abstract_canon_sha256":"3d682c7a9f7b993df35ae7d9ff30d0642f55d22eee5d465286f7f6078bbf11c0"},"schema_version":"1.0"},"canonical_sha256":"6ae734f7b9983352bea90b4a61a00fcef5b52a085a0ff4b78e6e4d9b73d589a3","source":{"kind":"arxiv","id":"1803.03021","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.03021","created_at":"2026-05-18T00:21:44Z"},{"alias_kind":"arxiv_version","alias_value":"1803.03021v1","created_at":"2026-05-18T00:21:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.03021","created_at":"2026-05-18T00:21:44Z"},{"alias_kind":"pith_short_12","alias_value":"NLTTJ55ZTAZV","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NLTTJ55ZTAZVFPVJ","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NLTTJ55Z","created_at":"2026-05-18T12:32:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:NLTTJ55ZTAZVFPVJBNFGDIAPZ3","target":"record","payload":{"canonical_record":{"source":{"id":"1803.03021","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-08T10:02:42Z","cross_cats_sorted":[],"title_canon_sha256":"7f2790a4f953ebf65e1e25fbad664fe722ba3367b93c871fff9902563b843ad2","abstract_canon_sha256":"3d682c7a9f7b993df35ae7d9ff30d0642f55d22eee5d465286f7f6078bbf11c0"},"schema_version":"1.0"},"canonical_sha256":"6ae734f7b9983352bea90b4a61a00fcef5b52a085a0ff4b78e6e4d9b73d589a3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:21:44.665945Z","signature_b64":"ghbklJns3RDeLxbQ0S1tmCgB9aIZeKrLwkxT+SGx9fC69/4aDcn5wDiNhG0s8dkwtIABCV/m/ZM9AF3lsC0yBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6ae734f7b9983352bea90b4a61a00fcef5b52a085a0ff4b78e6e4d9b73d589a3","last_reissued_at":"2026-05-18T00:21:44.665465Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:21:44.665465Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.03021","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:21:44Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"eYQERy5+DCCv77shYUdtDuybGKY/QHhDg6wo3YHXnNQJaPtiiZ47j6mkt8kTi3q1e9d8Q2Wj+ny33aHEHjILCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:38:23.000086Z"},"content_sha256":"9d91c310589a76cee680214cfc95494d65f35bd6cc54aa25951e3a43eaa29bf7","schema_version":"1.0","event_id":"sha256:9d91c310589a76cee680214cfc95494d65f35bd6cc54aa25951e3a43eaa29bf7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:NLTTJ55ZTAZVFPVJBNFGDIAPZ3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SA-IGA: A Multiagent Reinforcement Learning Method Towards Socially Optimal Outcomes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chengwei Zhang, Jianye Hao, Karl Tuyls, Siqi Chen, Wanli Xue, Xiaohong Li","submitted_at":"2018-03-08T10:02:42Z","abstract_excerpt":"In multiagent environments, the capability of learning is important for an agent to behave appropriately in face of unknown opponents and dynamic environment. From the system designer's perspective, it is desirable if the agents can learn to coordinate towards socially optimal outcomes, while also avoiding being exploited by selfish opponents. To this end, we propose a novel gradient ascent based algorithm (SA-IGA) which augments the basic gradient-ascent algorithm by incorporating social awareness into the policy update process. We theoretically analyze the learning dynamics of SA-IGA using d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.03021","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:21:44Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FlzT1cHy5av3q7Nb31c8ijqGmtmejTXvRxyZ6rKve3IlQ1TrxYzb3jaQXXClaum+IjNWTXCBNQS77i659GdZBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T23:38:23.000750Z"},"content_sha256":"8baa1286f91d2824fa651aa4a7e677002956aa60f8a462296cd1ac3529c52721","schema_version":"1.0","event_id":"sha256:8baa1286f91d2824fa651aa4a7e677002956aa60f8a462296cd1ac3529c52721"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3/bundle.json","state_url":"https://pith.science/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T23:38:23Z","links":{"resolver":"https://pith.science/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3","bundle":"https://pith.science/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3/bundle.json","state":"https://pith.science/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NLTTJ55ZTAZVFPVJBNFGDIAPZ3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:NLTTJ55ZTAZVFPVJBNFGDIAPZ3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3d682c7a9f7b993df35ae7d9ff30d0642f55d22eee5d465286f7f6078bbf11c0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-08T10:02:42Z","title_canon_sha256":"7f2790a4f953ebf65e1e25fbad664fe722ba3367b93c871fff9902563b843ad2"},"schema_version":"1.0","source":{"id":"1803.03021","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.03021","created_at":"2026-05-18T00:21:44Z"},{"alias_kind":"arxiv_version","alias_value":"1803.03021v1","created_at":"2026-05-18T00:21:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.03021","created_at":"2026-05-18T00:21:44Z"},{"alias_kind":"pith_short_12","alias_value":"NLTTJ55ZTAZV","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NLTTJ55ZTAZVFPVJ","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NLTTJ55Z","created_at":"2026-05-18T12:32:40Z"}],"graph_snapshots":[{"event_id":"sha256:8baa1286f91d2824fa651aa4a7e677002956aa60f8a462296cd1ac3529c52721","target":"graph","created_at":"2026-05-18T00:21:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In multiagent environments, the capability of learning is important for an agent to behave appropriately in face of unknown opponents and dynamic environment. From the system designer's perspective, it is desirable if the agents can learn to coordinate towards socially optimal outcomes, while also avoiding being exploited by selfish opponents. To this end, we propose a novel gradient ascent based algorithm (SA-IGA) which augments the basic gradient-ascent algorithm by incorporating social awareness into the policy update process. We theoretically analyze the learning dynamics of SA-IGA using d","authors_text":"Chengwei Zhang, Jianye Hao, Karl Tuyls, Siqi Chen, Wanli Xue, Xiaohong Li","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-08T10:02:42Z","title":"SA-IGA: A Multiagent Reinforcement Learning Method Towards Socially Optimal Outcomes"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.03021","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9d91c310589a76cee680214cfc95494d65f35bd6cc54aa25951e3a43eaa29bf7","target":"record","created_at":"2026-05-18T00:21:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3d682c7a9f7b993df35ae7d9ff30d0642f55d22eee5d465286f7f6078bbf11c0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-08T10:02:42Z","title_canon_sha256":"7f2790a4f953ebf65e1e25fbad664fe722ba3367b93c871fff9902563b843ad2"},"schema_version":"1.0","source":{"id":"1803.03021","kind":"arxiv","version":1}},"canonical_sha256":"6ae734f7b9983352bea90b4a61a00fcef5b52a085a0ff4b78e6e4d9b73d589a3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6ae734f7b9983352bea90b4a61a00fcef5b52a085a0ff4b78e6e4d9b73d589a3","first_computed_at":"2026-05-18T00:21:44.665465Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:21:44.665465Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ghbklJns3RDeLxbQ0S1tmCgB9aIZeKrLwkxT+SGx9fC69/4aDcn5wDiNhG0s8dkwtIABCV/m/ZM9AF3lsC0yBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:21:44.665945Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.03021","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9d91c310589a76cee680214cfc95494d65f35bd6cc54aa25951e3a43eaa29bf7","sha256:8baa1286f91d2824fa651aa4a7e677002956aa60f8a462296cd1ac3529c52721"],"state_sha256":"d21fe5bf2abbc2b23215d319935f84f1eb78bb8786000076826ed8022ce8ad59"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ztq3CHAdUHbb+LIIkmsgG7wLDdCeFTZ+ZNZWvcwlnxMcUU8FG/DvyoT7+JX2dR+M8X478EBTtiv4HLVjOrw9Ag==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T23:38:23.004238Z","bundle_sha256":"2cedf496909a52e6d72bd58e866c8d12fd7dfb21262f4aa2a6edf5e6bd9fb131"}}