{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:TQMC7C3ZTOH53UI6BQLC525C7S","short_pith_number":"pith:TQMC7C3Z","schema_version":"1.0","canonical_sha256":"9c182f8b799b8fddd11e0c162eeba2fc9c684c9ca39d7f4674f6ff3c83525088","source":{"kind":"arxiv","id":"2511.15407","version":3},"attestation_state":"computed","paper":{"title":"IPR-1: Interactive Physical Reasoner","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"An interactive physical reasoner learns causal physics from game play and surpasses GPT-5 overall.","cross_cats":["cs.CV","cs.LG"],"primary_cat":"cs.AI","authors_text":"Guocan Xie, Jiting Cai, Lifeng Zhuo, Mingyu Zhang, Renjie Zhao, Tianxi Tan, Xian Nie, Yan Li, Yong-Lu Li, Ziyu Wang, Zizhu He","submitted_at":"2025-11-19T13:04:44Z","abstract_excerpt":"Humans learn by observing, interacting with environments, and internalizing physics and causality. Here, we aim to ask whether an agent can similarly acquire human-like reasoning from interaction and keep improving with more experience. To study this, we introduce a Game-to-Unseen (G2U) benchmark of 1,000+ heterogeneous games that exhibit significant visual domain gaps. Existing approaches, including VLMs and world models, struggle to capture underlying physics and causality since they are not focused on core mechanisms and overfit to visual details. VLM/VLA agents reason but lack look-ahead i"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2511.15407","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-11-19T13:04:44Z","cross_cats_sorted":["cs.CV","cs.LG"],"title_canon_sha256":"db3f1667fba7e9743158fd5fde8fafc3be7a2cb8af83b51d4eb65032bf758297","abstract_canon_sha256":"e2416c25fff8345fc89728c0fe0b05185a418e8102d0379253dca303700d4fad"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:17.120579Z","signature_b64":"jGq7Z7EvnSjE0zyfHInXrjSxsZB/VMhI+r+o1agCzi58MecKDH1Jj5WPupvHmmxNoTjYQATXQTPpQ3nuRcLtCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9c182f8b799b8fddd11e0c162eeba2fc9c684c9ca39d7f4674f6ff3c83525088","last_reissued_at":"2026-05-17T23:39:17.119847Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:17.119847Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"IPR-1: Interactive Physical Reasoner","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"An interactive physical reasoner learns causal physics from game play and surpasses GPT-5 overall.","cross_cats":["cs.CV","cs.LG"],"primary_cat":"cs.AI","authors_text":"Guocan Xie, Jiting Cai, Lifeng Zhuo, Mingyu Zhang, Renjie Zhao, Tianxi Tan, Xian Nie, Yan Li, Yong-Lu Li, Ziyu Wang, Zizhu He","submitted_at":"2025-11-19T13:04:44Z","abstract_excerpt":"Humans learn by observing, interacting with environments, and internalizing physics and causality. Here, we aim to ask whether an agent can similarly acquire human-like reasoning from interaction and keep improving with more experience. To study this, we introduce a Game-to-Unseen (G2U) benchmark of 1,000+ heterogeneous games that exhibit significant visual domain gaps. Existing approaches, including VLMs and world models, struggle to capture underlying physics and causality since they are not focused on core mechanisms and overfit to visual details. VLM/VLA agents reason but lack look-ahead i"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Pretrained on 1,000+ games, our IPR performs robustly on levels from primitive intuition to goal-driven reasoning, and even surpasses GPT-5 overall. We find that performance improves with more training games and interaction steps, and that the model also zero-shot transfers to unseen games.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That world-model rollouts capture true underlying physics and causality rather than visual patterns, and that the G2U benchmark's visual domain gaps and heterogeneous games sufficiently isolate core mechanisms from superficial appearance.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"IPR uses world-model rollouts to reinforce a VLM policy via PhysCode on a 1000+ game benchmark, achieving robust physical reasoning that improves with experience and transfers zero-shot to unseen games while surpassing GPT-5.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"An interactive physical reasoner learns causal physics from game play and surpasses GPT-5 overall.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"ac49a7efe251abbd2f98510a9b7a9929a2f022c36627d81fe28d9c04db94c79f"},"source":{"id":"2511.15407","kind":"arxiv","version":3},"verdict":{"id":"835e6131-2c82-471c-9f04-e0aacecce2df","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T20:51:51.651115Z","strongest_claim":"Pretrained on 1,000+ games, our IPR performs robustly on levels from primitive intuition to goal-driven reasoning, and even surpasses GPT-5 overall. We find that performance improves with more training games and interaction steps, and that the model also zero-shot transfers to unseen games.","one_line_summary":"IPR uses world-model rollouts to reinforce a VLM policy via PhysCode on a 1000+ game benchmark, achieving robust physical reasoning that improves with experience and transfers zero-shot to unseen games while surpassing GPT-5.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That world-model rollouts capture true underlying physics and causality rather than visual patterns, and that the G2U benchmark's visual domain gaps and heterogeneous games sufficiently isolate core mechanisms from superficial appearance.","pith_extraction_headline":"An interactive physical reasoner learns causal physics from game play and surpasses GPT-5 overall."},"references":{"count":88,"sample":[{"doi":"","year":2022,"title":"Do as i can, not as i say: Grounding language in robotic affordances, 2022","work_id":"f20a3a19-c2e0-4d0e-badd-a3addf2c72e1","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Metric space magnitude and generalisation in neural networks","work_id":"dc228c2e-6619-4962-9a38-22e527a5d9af","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"V-jepa 2: Self-supervised video models enable understanding, prediction and planning, 2025","work_id":"372447af-92e0-43e5-85e8-ea20e394ed91","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"V-JEPA 2: Self-Supervised Video Models Enable Understanding, Prediction and Planning","work_id":"a9c28401-f16a-4933-89f0-788e2f94e52b","ref_index":4,"cited_arxiv_id":"2506.09985","is_internal_anchor":true},{"doi":"","year":2023,"title":"Qwen-VL: A Versatile Vision-Language Model for Understanding, Localization, Text Reading, and Beyond","work_id":"cbc2bb21-b6bb-46c0-80bf-107e195ffe10","ref_index":5,"cited_arxiv_id":"2308.12966","is_internal_anchor":true}],"resolved_work":88,"snapshot_sha256":"0cae011bce3789348e1622e0b897225b6a9aaf8c67017125a0091e1d1af23d7e","internal_anchors":11},"formal_canon":{"evidence_count":2,"snapshot_sha256":"bb44683e24fbd5cb9852da294ecbbf9ea99d987d47ac87aac0974a141a5907f9"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.15407","created_at":"2026-05-17T23:39:17.119966+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.15407v3","created_at":"2026-05-17T23:39:17.119966+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.15407","created_at":"2026-05-17T23:39:17.119966+00:00"},{"alias_kind":"pith_short_12","alias_value":"TQMC7C3ZTOH5","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"TQMC7C3ZTOH53UI6","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"TQMC7C3Z","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.09965","citing_title":"Towards Generalist Game Players: An Investigation of Foundation Models in the Game Multiverse","ref_index":215,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09965","citing_title":"Towards Generalist Game Players: An Investigation of Foundation Models in the Game Multiverse","ref_index":215,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S","json":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S.json","graph_json":"https://pith.science/api/pith-number/TQMC7C3ZTOH53UI6BQLC525C7S/graph.json","events_json":"https://pith.science/api/pith-number/TQMC7C3ZTOH53UI6BQLC525C7S/events.json","paper":"https://pith.science/paper/TQMC7C3Z"},"agent_actions":{"view_html":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S","download_json":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S.json","view_paper":"https://pith.science/paper/TQMC7C3Z","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.15407&json=true","fetch_graph":"https://pith.science/api/pith-number/TQMC7C3ZTOH53UI6BQLC525C7S/graph.json","fetch_events":"https://pith.science/api/pith-number/TQMC7C3ZTOH53UI6BQLC525C7S/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S/action/storage_attestation","attest_author":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S/action/author_attestation","sign_citation":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S/action/citation_signature","submit_replication":"https://pith.science/pith/TQMC7C3ZTOH53UI6BQLC525C7S/action/replication_record"}},"created_at":"2026-05-17T23:39:17.119966+00:00","updated_at":"2026-05-17T23:39:17.119966+00:00"}