{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:UO4SIWCYLDFC7TIGMNBGMUSOCG","short_pith_number":"pith:UO4SIWCY","schema_version":"1.0","canonical_sha256":"a3b924585858ca2fcd06634266524e11a31069ef39960beafc1728cadcbcc3fa","source":{"kind":"arxiv","id":"1809.10007","version":2},"attestation_state":"computed","paper":{"title":"Learning through Probing: a decentralized reinforcement learning architecture for social dilemmas","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.GT","cs.LG"],"primary_cat":"cs.MA","authors_text":"Mirco Musolesi, Nicolas Anastassacos","submitted_at":"2018-09-26T14:10:13Z","abstract_excerpt":"Multi-agent reinforcement learning has received significant interest in recent years notably due to the advancements made in deep reinforcement learning which have allowed for the developments of new architectures and learning algorithms. Using social dilemmas as the training ground, we present a novel learning architecture, Learning through Probing (LTP), where agents utilize a probing mechanism to incorporate how their opponent's behavior changes when an agent takes an action. We use distinct training phases and adjust rewards according to the overall outcome of the experiences accounting fo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.10007","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.MA","submitted_at":"2018-09-26T14:10:13Z","cross_cats_sorted":["cs.AI","cs.GT","cs.LG"],"title_canon_sha256":"fcfbdc85078836d6817468105c91f9c08022e0e862701f3e1e3c0f5c12d4dd19","abstract_canon_sha256":"629f4edb56fb3f70f956bf494165475b1d68b1f1a1b54e8875720a60726c8a1a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:31.221616Z","signature_b64":"4oBD9akpzIWc6dullXJdOtBefkc+SM/d/MGzjsveP3eTnVywiaU1TI46FnMlvZaxs/ZGZxkTQyrgXI/5GdLICg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a3b924585858ca2fcd06634266524e11a31069ef39960beafc1728cadcbcc3fa","last_reissued_at":"2026-05-17T23:57:31.220936Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:31.220936Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning through Probing: a decentralized reinforcement learning architecture for social dilemmas","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.GT","cs.LG"],"primary_cat":"cs.MA","authors_text":"Mirco Musolesi, Nicolas Anastassacos","submitted_at":"2018-09-26T14:10:13Z","abstract_excerpt":"Multi-agent reinforcement learning has received significant interest in recent years notably due to the advancements made in deep reinforcement learning which have allowed for the developments of new architectures and learning algorithms. Using social dilemmas as the training ground, we present a novel learning architecture, Learning through Probing (LTP), where agents utilize a probing mechanism to incorporate how their opponent's behavior changes when an agent takes an action. We use distinct training phases and adjust rewards according to the overall outcome of the experiences accounting fo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.10007","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.10007","created_at":"2026-05-17T23:57:31.221025+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.10007v2","created_at":"2026-05-17T23:57:31.221025+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.10007","created_at":"2026-05-17T23:57:31.221025+00:00"},{"alias_kind":"pith_short_12","alias_value":"UO4SIWCYLDFC","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_16","alias_value":"UO4SIWCYLDFC7TIG","created_at":"2026-05-18T12:32:56.356000+00:00"},{"alias_kind":"pith_short_8","alias_value":"UO4SIWCY","created_at":"2026-05-18T12:32:56.356000+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG","json":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG.json","graph_json":"https://pith.science/api/pith-number/UO4SIWCYLDFC7TIGMNBGMUSOCG/graph.json","events_json":"https://pith.science/api/pith-number/UO4SIWCYLDFC7TIGMNBGMUSOCG/events.json","paper":"https://pith.science/paper/UO4SIWCY"},"agent_actions":{"view_html":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG","download_json":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG.json","view_paper":"https://pith.science/paper/UO4SIWCY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.10007&json=true","fetch_graph":"https://pith.science/api/pith-number/UO4SIWCYLDFC7TIGMNBGMUSOCG/graph.json","fetch_events":"https://pith.science/api/pith-number/UO4SIWCYLDFC7TIGMNBGMUSOCG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG/action/storage_attestation","attest_author":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG/action/author_attestation","sign_citation":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG/action/citation_signature","submit_replication":"https://pith.science/pith/UO4SIWCYLDFC7TIGMNBGMUSOCG/action/replication_record"}},"created_at":"2026-05-17T23:57:31.221025+00:00","updated_at":"2026-05-17T23:57:31.221025+00:00"}