{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:GMBD7KOERXIVKEXNC4ICRIOPJJ","short_pith_number":"pith:GMBD7KOE","schema_version":"1.0","canonical_sha256":"33023fa9c48dd15512ed171028a1cf4a443ee426aded3e98d8133f032019066e","source":{"kind":"arxiv","id":"2511.18127","version":2},"attestation_state":"computed","paper":{"title":"SFHand: Learning Embodied Manipulation by Streaming Egocentric 3D Hand Forecasting","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Caixin Kang, Liangyang Ouyang, Ruicong Liu, Yifei Huang, Yoichi Sato","submitted_at":"2025-11-22T17:22:24Z","abstract_excerpt":"Real-time 3D hand forecasting is a critical component for fluid human-computer interaction in applications like AR and assistive robotics. However, existing methods are ill-suited for these scenarios, as they typically require offline access to accumulated video sequences and cannot incorporate language guidance that conveys task intent. To overcome these limitations, we introduce SFHand, the first streaming framework for language-guided 3D hand forecasting. SFHand autoregressively predicts a comprehensive set of future 3D hand states, including hand type, 2D bounding box, 3D pose, and traject"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2511.18127","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-11-22T17:22:24Z","cross_cats_sorted":[],"title_canon_sha256":"df9f510c6b960f55d11c709e578dbe9063b3c8b6409d6fde3aff769396582f0a","abstract_canon_sha256":"37dc923b2fbcfd12d33f3f009a3970037e404bf1fde91ed7274b00785577ff46"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:28.338579Z","signature_b64":"VuGgxWTwC7EgmiiUK2t34RoxzStXgOI61tHsCXB+7h6EvUfxyEB9+GDGs7/RTNRonKCdzmkdGqFWMCexebUsAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"33023fa9c48dd15512ed171028a1cf4a443ee426aded3e98d8133f032019066e","last_reissued_at":"2026-05-20T00:00:28.337871Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:28.337871Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SFHand: Learning Embodied Manipulation by Streaming Egocentric 3D Hand Forecasting","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Caixin Kang, Liangyang Ouyang, Ruicong Liu, Yifei Huang, Yoichi Sato","submitted_at":"2025-11-22T17:22:24Z","abstract_excerpt":"Real-time 3D hand forecasting is a critical component for fluid human-computer interaction in applications like AR and assistive robotics. However, existing methods are ill-suited for these scenarios, as they typically require offline access to accumulated video sequences and cannot incorporate language guidance that conveys task intent. To overcome these limitations, we introduce SFHand, the first streaming framework for language-guided 3D hand forecasting. SFHand autoregressively predicts a comprehensive set of future 3D hand states, including hand type, 2D bounding box, 3D pose, and traject"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.18127","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.18127/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.18127","created_at":"2026-05-20T00:00:28.337987+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.18127v2","created_at":"2026-05-20T00:00:28.337987+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.18127","created_at":"2026-05-20T00:00:28.337987+00:00"},{"alias_kind":"pith_short_12","alias_value":"GMBD7KOERXIV","created_at":"2026-05-20T00:00:28.337987+00:00"},{"alias_kind":"pith_short_16","alias_value":"GMBD7KOERXIVKEXN","created_at":"2026-05-20T00:00:28.337987+00:00"},{"alias_kind":"pith_short_8","alias_value":"GMBD7KOE","created_at":"2026-05-20T00:00:28.337987+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.22109","citing_title":"Perception or Prejudice: Can MLLMs Go Beyond First Impressions of Personality?","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10079","citing_title":"SocialDirector: Training-Free Social Interaction Control for Multi-Person Video Generation","ref_index":38,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ","json":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ.json","graph_json":"https://pith.science/api/pith-number/GMBD7KOERXIVKEXNC4ICRIOPJJ/graph.json","events_json":"https://pith.science/api/pith-number/GMBD7KOERXIVKEXNC4ICRIOPJJ/events.json","paper":"https://pith.science/paper/GMBD7KOE"},"agent_actions":{"view_html":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ","download_json":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ.json","view_paper":"https://pith.science/paper/GMBD7KOE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.18127&json=true","fetch_graph":"https://pith.science/api/pith-number/GMBD7KOERXIVKEXNC4ICRIOPJJ/graph.json","fetch_events":"https://pith.science/api/pith-number/GMBD7KOERXIVKEXNC4ICRIOPJJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ/action/storage_attestation","attest_author":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ/action/author_attestation","sign_citation":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ/action/citation_signature","submit_replication":"https://pith.science/pith/GMBD7KOERXIVKEXNC4ICRIOPJJ/action/replication_record"}},"created_at":"2026-05-20T00:00:28.337987+00:00","updated_at":"2026-05-20T00:00:28.337987+00:00"}