{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:GVFY2EOUK2EJMSWLD3JDOCT2PE","short_pith_number":"pith:GVFY2EOU","schema_version":"1.0","canonical_sha256":"354b8d11d45688964acb1ed2370a7a790fdb3853875d14663c3ad60186a0b20b","source":{"kind":"arxiv","id":"1703.09788","version":3},"attestation_state":"computed","paper":{"title":"Towards Automatic Learning of Procedures from Web Instructional Videos","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chenliang Xu, Jason J. Corso, Luowei Zhou","submitted_at":"2017-03-28T20:28:52Z","abstract_excerpt":"The potential for agents, whether embodied or software, to learn by observing other agents performing procedures involving objects and actions is rich. Current research on automatic procedure learning heavily relies on action labels or video subtitles, even during the evaluation phase, which makes them infeasible in real-world scenarios. This leads to our question: can the human-consensus structure of a procedure be learned from a large set of long, unconstrained videos (e.g., instructional videos from YouTube) with only visual evidence? To answer this question, we introduce the problem of pro"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1703.09788","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-28T20:28:52Z","cross_cats_sorted":[],"title_canon_sha256":"78009b9adbb78ee93f1b1213775f825017b684142aba678bede006743c4fd751","abstract_canon_sha256":"6717943aec0f4ad55172171f98bcc3fd932c402f6f5b96a06ada0fcdff1c69c0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:51.778825Z","signature_b64":"fje4qA4VZ4rwbITq9Ggi1hbnwYLTWzFcU1c90kHHR5SJEikwR0oIAInuehXO2YgbB0YoGdJM4qPleTE0lCkoCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"354b8d11d45688964acb1ed2370a7a790fdb3853875d14663c3ad60186a0b20b","last_reissued_at":"2026-05-18T00:29:51.778282Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:51.778282Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Automatic Learning of Procedures from Web Instructional Videos","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chenliang Xu, Jason J. Corso, Luowei Zhou","submitted_at":"2017-03-28T20:28:52Z","abstract_excerpt":"The potential for agents, whether embodied or software, to learn by observing other agents performing procedures involving objects and actions is rich. Current research on automatic procedure learning heavily relies on action labels or video subtitles, even during the evaluation phase, which makes them infeasible in real-world scenarios. This leads to our question: can the human-consensus structure of a procedure be learned from a large set of long, unconstrained videos (e.g., instructional videos from YouTube) with only visual evidence? To answer this question, we introduce the problem of pro"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.09788","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1703.09788","created_at":"2026-05-18T00:29:51.778375+00:00"},{"alias_kind":"arxiv_version","alias_value":"1703.09788v3","created_at":"2026-05-18T00:29:51.778375+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.09788","created_at":"2026-05-18T00:29:51.778375+00:00"},{"alias_kind":"pith_short_12","alias_value":"GVFY2EOUK2EJ","created_at":"2026-05-18T12:31:18.294218+00:00"},{"alias_kind":"pith_short_16","alias_value":"GVFY2EOUK2EJMSWL","created_at":"2026-05-18T12:31:18.294218+00:00"},{"alias_kind":"pith_short_8","alias_value":"GVFY2EOU","created_at":"2026-05-18T12:31:18.294218+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2309.16671","citing_title":"Demystifying CLIP Data","ref_index":184,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE","json":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE.json","graph_json":"https://pith.science/api/pith-number/GVFY2EOUK2EJMSWLD3JDOCT2PE/graph.json","events_json":"https://pith.science/api/pith-number/GVFY2EOUK2EJMSWLD3JDOCT2PE/events.json","paper":"https://pith.science/paper/GVFY2EOU"},"agent_actions":{"view_html":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE","download_json":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE.json","view_paper":"https://pith.science/paper/GVFY2EOU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1703.09788&json=true","fetch_graph":"https://pith.science/api/pith-number/GVFY2EOUK2EJMSWLD3JDOCT2PE/graph.json","fetch_events":"https://pith.science/api/pith-number/GVFY2EOUK2EJMSWLD3JDOCT2PE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE/action/storage_attestation","attest_author":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE/action/author_attestation","sign_citation":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE/action/citation_signature","submit_replication":"https://pith.science/pith/GVFY2EOUK2EJMSWLD3JDOCT2PE/action/replication_record"}},"created_at":"2026-05-18T00:29:51.778375+00:00","updated_at":"2026-05-18T00:29:51.778375+00:00"}