{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:BF4S6654AXYNQRTABTOED35P75","short_pith_number":"pith:BF4S6654","canonical_record":{"source":{"id":"2508.06206","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-08-08T10:39:04Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"0d961a47fe42356b6afc94fe5f8ad41cd3877f2dd83f22bd7a975b64ddcfe849","abstract_canon_sha256":"f0fd336fe3d9bc8059b4480ebf62db307eacc3c5e3376a6b7d58645d1a2d4fa6"},"schema_version":"1.0"},"canonical_sha256":"09792f7bbc05f0d846600cdc41efafff6834f3c836aa21e98cfff82244b8febf","source":{"kind":"arxiv","id":"2508.06206","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.06206","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"arxiv_version","alias_value":"2508.06206v5","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.06206","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"pith_short_12","alias_value":"BF4S6654AXYN","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"pith_short_16","alias_value":"BF4S6654AXYNQRTA","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"pith_short_8","alias_value":"BF4S6654","created_at":"2026-05-21T01:04:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:BF4S6654AXYNQRTABTOED35P75","target":"record","payload":{"canonical_record":{"source":{"id":"2508.06206","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-08-08T10:39:04Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"0d961a47fe42356b6afc94fe5f8ad41cd3877f2dd83f22bd7a975b64ddcfe849","abstract_canon_sha256":"f0fd336fe3d9bc8059b4480ebf62db307eacc3c5e3376a6b7d58645d1a2d4fa6"},"schema_version":"1.0"},"canonical_sha256":"09792f7bbc05f0d846600cdc41efafff6834f3c836aa21e98cfff82244b8febf","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:04:15.767187Z","signature_b64":"eCcZdoUyzy/bJ33oi3Vz6h6u1z3FTb6E9jSwQ1ZK6LaURg5YXeGTsvg61UnmlOBi82r3bgUi2UMnL/fDvDtzBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"09792f7bbc05f0d846600cdc41efafff6834f3c836aa21e98cfff82244b8febf","last_reissued_at":"2026-05-21T01:04:15.766254Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:04:15.766254Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2508.06206","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pNCRZKCRaG7NLy2cd1FArffLm8HIK9eT8foL7wM435jST27xwJZt3mCkbuxM/RdTmMgHEQ0DnnCLQkpgMfQMDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T16:15:40.445058Z"},"content_sha256":"4845af6713ca05f70b91f64733ac4982cd41d2db7781c8b706bd4ce957847ca0","schema_version":"1.0","event_id":"sha256:4845af6713ca05f70b91f64733ac4982cd41d2db7781c8b706bd4ce957847ca0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:BF4S6654AXYNQRTABTOED35P75","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Affordance-R1: Reinforcement Learning for Generalizable Affordance Reasoning in Multimodal Large Language Model","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Reinforcement learning via GRPO with a custom affordance reward function produces zero-shot generalization and emergent test-time reasoning in multimodal models for robot affordance grounding.","cross_cats":["cs.CV"],"primary_cat":"cs.RO","authors_text":"Hanqing Wang, Jiahao Yuan, Jiamin Wang, Mingyu Liu, Shaoyang Wang, Yifan Han, Yiming Zhong, Yuexin Ma, Zemin Yang, Zhiqing Cui","submitted_at":"2025-08-08T10:39:04Z","abstract_excerpt":"Affordance grounding focuses on predicting the specific regions of objects that are associated with the actions to be performed by robots. It plays a vital role in the fields of human-robot interaction, human-object interaction, embodied manipulation, and embodied perception. Existing models often neglect the affordance shared among different objects because they lack the Chain-of-Thought(CoT) reasoning abilities, limiting their out-of-domain (OOD) generalization and explicit reasoning capabilities. To address these challenges, we propose Affordance-R1, the first unified affordance grounding f"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Trained exclusively via reinforcement learning with GRPO and without explicit reasoning data, Affordance-R1 achieves robust zero-shot generalization and exhibits emergent test-time reasoning capabilities.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The custom affordance function containing format, perception, and cognition rewards will steer the GRPO optimization toward generalizable cognitive reasoning rather than overfitting to training distributions or reward specifics, as implied by the claim of emergent capabilities from RL-only training.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Affordance-R1 applies GRPO-based reinforcement learning to multimodal LLMs for affordance grounding, using format-perception-cognition rewards and the ReasonAff dataset to achieve zero-shot generalization and emergent reasoning.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Reinforcement learning via GRPO with a custom affordance reward function produces zero-shot generalization and emergent test-time reasoning in multimodal models for robot affordance grounding.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"5c4795ebb6efb4dbe3db1a29f0426fa8d6c79a948ef217fecddc506e10f226d4"},"source":{"id":"2508.06206","kind":"arxiv","version":5},"verdict":{"id":"e266e6ef-ab41-4018-ac98-c477c555019a","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T00:40:01.472651Z","strongest_claim":"Trained exclusively via reinforcement learning with GRPO and without explicit reasoning data, Affordance-R1 achieves robust zero-shot generalization and exhibits emergent test-time reasoning capabilities.","one_line_summary":"Affordance-R1 applies GRPO-based reinforcement learning to multimodal LLMs for affordance grounding, using format-perception-cognition rewards and the ReasonAff dataset to achieve zero-shot generalization and emergent reasoning.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The custom affordance function containing format, perception, and cognition rewards will steer the GRPO optimization toward generalizable cognitive reasoning rather than overfitting to training distributions or reward specifics, as implied by the claim of emergent capabilities from RL-only training.","pith_extraction_headline":"Reinforcement learning via GRPO with a custom affordance reward function produces zero-shot generalization and emergent test-time reasoning in multimodal models for robot affordance grounding."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.06206/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"495f7b31d2c55751f6cdbc90de91dae101e1f15dcbc7e25739547169d6310400"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"e266e6ef-ab41-4018-ac98-c477c555019a"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:04:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VheeaMCj2jaPF3wBbfSxtyZ3OJlMcmXMoHsT95l+sl3O88MCBlcAhoolPbTFRZtBPxiER2vSO97P25KJZl0RCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T16:15:40.445579Z"},"content_sha256":"ec55161d0e20ace055fd9404f2a1e39900acae9a71e5e0b99bd92423dd3f1982","schema_version":"1.0","event_id":"sha256:ec55161d0e20ace055fd9404f2a1e39900acae9a71e5e0b99bd92423dd3f1982"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BF4S6654AXYNQRTABTOED35P75/bundle.json","state_url":"https://pith.science/pith/BF4S6654AXYNQRTABTOED35P75/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BF4S6654AXYNQRTABTOED35P75/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T16:15:40Z","links":{"resolver":"https://pith.science/pith/BF4S6654AXYNQRTABTOED35P75","bundle":"https://pith.science/pith/BF4S6654AXYNQRTABTOED35P75/bundle.json","state":"https://pith.science/pith/BF4S6654AXYNQRTABTOED35P75/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BF4S6654AXYNQRTABTOED35P75/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:BF4S6654AXYNQRTABTOED35P75","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f0fd336fe3d9bc8059b4480ebf62db307eacc3c5e3376a6b7d58645d1a2d4fa6","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-08-08T10:39:04Z","title_canon_sha256":"0d961a47fe42356b6afc94fe5f8ad41cd3877f2dd83f22bd7a975b64ddcfe849"},"schema_version":"1.0","source":{"id":"2508.06206","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.06206","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"arxiv_version","alias_value":"2508.06206v5","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.06206","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"pith_short_12","alias_value":"BF4S6654AXYN","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"pith_short_16","alias_value":"BF4S6654AXYNQRTA","created_at":"2026-05-21T01:04:15Z"},{"alias_kind":"pith_short_8","alias_value":"BF4S6654","created_at":"2026-05-21T01:04:15Z"}],"graph_snapshots":[{"event_id":"sha256:ec55161d0e20ace055fd9404f2a1e39900acae9a71e5e0b99bd92423dd3f1982","target":"graph","created_at":"2026-05-21T01:04:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Trained exclusively via reinforcement learning with GRPO and without explicit reasoning data, Affordance-R1 achieves robust zero-shot generalization and exhibits emergent test-time reasoning capabilities."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The custom affordance function containing format, perception, and cognition rewards will steer the GRPO optimization toward generalizable cognitive reasoning rather than overfitting to training distributions or reward specifics, as implied by the claim of emergent capabilities from RL-only training."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Affordance-R1 applies GRPO-based reinforcement learning to multimodal LLMs for affordance grounding, using format-perception-cognition rewards and the ReasonAff dataset to achieve zero-shot generalization and emergent reasoning."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning via GRPO with a custom affordance reward function produces zero-shot generalization and emergent test-time reasoning in multimodal models for robot affordance grounding."}],"snapshot_sha256":"5c4795ebb6efb4dbe3db1a29f0426fa8d6c79a948ef217fecddc506e10f226d4"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"495f7b31d2c55751f6cdbc90de91dae101e1f15dcbc7e25739547169d6310400"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2508.06206/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Affordance grounding focuses on predicting the specific regions of objects that are associated with the actions to be performed by robots. It plays a vital role in the fields of human-robot interaction, human-object interaction, embodied manipulation, and embodied perception. Existing models often neglect the affordance shared among different objects because they lack the Chain-of-Thought(CoT) reasoning abilities, limiting their out-of-domain (OOD) generalization and explicit reasoning capabilities. To address these challenges, we propose Affordance-R1, the first unified affordance grounding f","authors_text":"Hanqing Wang, Jiahao Yuan, Jiamin Wang, Mingyu Liu, Shaoyang Wang, Yifan Han, Yiming Zhong, Yuexin Ma, Zemin Yang, Zhiqing Cui","cross_cats":["cs.CV"],"headline":"Reinforcement learning via GRPO with a custom affordance reward function produces zero-shot generalization and emergent test-time reasoning in multimodal models for robot affordance grounding.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-08-08T10:39:04Z","title":"Affordance-R1: Reinforcement Learning for Generalizable Affordance Reasoning in Multimodal Large Language Model"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.06206","kind":"arxiv","version":5},"verdict":{"created_at":"2026-05-19T00:40:01.472651Z","id":"e266e6ef-ab41-4018-ac98-c477c555019a","model_set":{"reader":"grok-4.3"},"one_line_summary":"Affordance-R1 applies GRPO-based reinforcement learning to multimodal LLMs for affordance grounding, using format-perception-cognition rewards and the ReasonAff dataset to achieve zero-shot generalization and emergent reasoning.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning via GRPO with a custom affordance reward function produces zero-shot generalization and emergent test-time reasoning in multimodal models for robot affordance grounding.","strongest_claim":"Trained exclusively via reinforcement learning with GRPO and without explicit reasoning data, Affordance-R1 achieves robust zero-shot generalization and exhibits emergent test-time reasoning capabilities.","weakest_assumption":"The custom affordance function containing format, perception, and cognition rewards will steer the GRPO optimization toward generalizable cognitive reasoning rather than overfitting to training distributions or reward specifics, as implied by the claim of emergent capabilities from RL-only training."}},"verdict_id":"e266e6ef-ab41-4018-ac98-c477c555019a"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4845af6713ca05f70b91f64733ac4982cd41d2db7781c8b706bd4ce957847ca0","target":"record","created_at":"2026-05-21T01:04:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f0fd336fe3d9bc8059b4480ebf62db307eacc3c5e3376a6b7d58645d1a2d4fa6","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-08-08T10:39:04Z","title_canon_sha256":"0d961a47fe42356b6afc94fe5f8ad41cd3877f2dd83f22bd7a975b64ddcfe849"},"schema_version":"1.0","source":{"id":"2508.06206","kind":"arxiv","version":5}},"canonical_sha256":"09792f7bbc05f0d846600cdc41efafff6834f3c836aa21e98cfff82244b8febf","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"09792f7bbc05f0d846600cdc41efafff6834f3c836aa21e98cfff82244b8febf","first_computed_at":"2026-05-21T01:04:15.766254Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:04:15.766254Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"eCcZdoUyzy/bJ33oi3Vz6h6u1z3FTb6E9jSwQ1ZK6LaURg5YXeGTsvg61UnmlOBi82r3bgUi2UMnL/fDvDtzBQ==","signature_status":"signed_v1","signed_at":"2026-05-21T01:04:15.767187Z","signed_message":"canonical_sha256_bytes"},"source_id":"2508.06206","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4845af6713ca05f70b91f64733ac4982cd41d2db7781c8b706bd4ce957847ca0","sha256:ec55161d0e20ace055fd9404f2a1e39900acae9a71e5e0b99bd92423dd3f1982"],"state_sha256":"e492e13f97230d11fa57876653f274959956b0df777483931cbf68f617c0a070"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"O5fWgLcqXo7CxOBsKZPY7nY2m96AZMsu8jA8Rxezo3Nl7jO46cVCnvJHE4cpjhmcJ7okPwr/PofMVWABRxLBCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T16:15:40.447866Z","bundle_sha256":"31b279f1752f3b76a04726462844a72511e0ba3a261967046eed00ad8067010d"}}