{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:UROFS6QKTGHMH7S337EJB2OTY3","short_pith_number":"pith:UROFS6QK","canonical_record":{"source":{"id":"1703.02521","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-07T18:40:27Z","cross_cats_sorted":[],"title_canon_sha256":"56d29b7c8fe163778ca470f1de5cdc48bc3df8c962be1a88ee54901bdf0061e3","abstract_canon_sha256":"f726690f3b789725fb655a99b4a9ba0ab15db832ebaeb6ddc46350c6a4156e6e"},"schema_version":"1.0"},"canonical_sha256":"a45c597a0a998ec3fe5bdfc890e9d3c6ea83e20341898104a4fac1b45bd2a859","source":{"kind":"arxiv","id":"1703.02521","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.02521","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"arxiv_version","alias_value":"1703.02521v2","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.02521","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"pith_short_12","alias_value":"UROFS6QKTGHM","created_at":"2026-05-18T12:31:49Z"},{"alias_kind":"pith_short_16","alias_value":"UROFS6QKTGHMH7S3","created_at":"2026-05-18T12:31:49Z"},{"alias_kind":"pith_short_8","alias_value":"UROFS6QK","created_at":"2026-05-18T12:31:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:UROFS6QKTGHMH7S337EJB2OTY3","target":"record","payload":{"canonical_record":{"source":{"id":"1703.02521","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-07T18:40:27Z","cross_cats_sorted":[],"title_canon_sha256":"56d29b7c8fe163778ca470f1de5cdc48bc3df8c962be1a88ee54901bdf0061e3","abstract_canon_sha256":"f726690f3b789725fb655a99b4a9ba0ab15db832ebaeb6ddc46350c6a4156e6e"},"schema_version":"1.0"},"canonical_sha256":"a45c597a0a998ec3fe5bdfc890e9d3c6ea83e20341898104a4fac1b45bd2a859","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:44:08.844054Z","signature_b64":"tc5kGc71zqtRm+dz2xpER729nRMAZmgtBGjwcr/uo5w+3DE9JYgDI1t3K41QQYDpI4+yCq+ikK1CNCjIbnZ6CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a45c597a0a998ec3fe5bdfc890e9d3c6ea83e20341898104a4fac1b45bd2a859","last_reissued_at":"2026-05-18T00:44:08.843513Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:44:08.843513Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1703.02521","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:44:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MzKRzJZIKtcTbLSQBabp/v0K2ZBt96HkglR+0eYtED2xy/V6cDVWM+rItQR6CENn/xgQJHL3MKCJ08aWNKNaDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T23:12:30.403848Z"},"content_sha256":"558ac8ddfabe358e7ede797195654f74a5efc346bf5c66e480561075f532c2c5","schema_version":"1.0","event_id":"sha256:558ac8ddfabe358e7ede797195654f74a5efc346bf5c66e480561075f532c2c5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:UROFS6QKTGHMH7S337EJB2OTY3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Unsupervised Visual-Linguistic Reference Resolution in Instructional Videos","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"De-An Huang, Joseph J. Lim, Juan Carlos Niebles, Li Fei-Fei","submitted_at":"2017-03-07T18:40:27Z","abstract_excerpt":"We propose an unsupervised method for reference resolution in instructional videos, where the goal is to temporally link an entity (e.g., \"dressing\") to the action (e.g., \"mix yogurt\") that produced it. The key challenge is the inevitable visual-linguistic ambiguities arising from the changes in both visual appearance and referring expression of an entity in the video. This challenge is amplified by the fact that we aim to resolve references with no supervision. We address these challenges by learning a joint visual-linguistic model, where linguistic cues can help resolve visual ambiguities an"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.02521","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:44:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"prgUv43NLZqiXcKgEokbb/nREXq6nzUVI3m+r/90VzFDr2HXzXrs0GeKc8w57VePo+0P5iWoJbe70gJA0qx/CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T23:12:30.404398Z"},"content_sha256":"0687268a026856d5ae494dd9d3137e42b3c41972a162f3ed7f9d61191cf38d44","schema_version":"1.0","event_id":"sha256:0687268a026856d5ae494dd9d3137e42b3c41972a162f3ed7f9d61191cf38d44"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UROFS6QKTGHMH7S337EJB2OTY3/bundle.json","state_url":"https://pith.science/pith/UROFS6QKTGHMH7S337EJB2OTY3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UROFS6QKTGHMH7S337EJB2OTY3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-18T23:12:30Z","links":{"resolver":"https://pith.science/pith/UROFS6QKTGHMH7S337EJB2OTY3","bundle":"https://pith.science/pith/UROFS6QKTGHMH7S337EJB2OTY3/bundle.json","state":"https://pith.science/pith/UROFS6QKTGHMH7S337EJB2OTY3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UROFS6QKTGHMH7S337EJB2OTY3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:UROFS6QKTGHMH7S337EJB2OTY3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f726690f3b789725fb655a99b4a9ba0ab15db832ebaeb6ddc46350c6a4156e6e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-07T18:40:27Z","title_canon_sha256":"56d29b7c8fe163778ca470f1de5cdc48bc3df8c962be1a88ee54901bdf0061e3"},"schema_version":"1.0","source":{"id":"1703.02521","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.02521","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"arxiv_version","alias_value":"1703.02521v2","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.02521","created_at":"2026-05-18T00:44:08Z"},{"alias_kind":"pith_short_12","alias_value":"UROFS6QKTGHM","created_at":"2026-05-18T12:31:49Z"},{"alias_kind":"pith_short_16","alias_value":"UROFS6QKTGHMH7S3","created_at":"2026-05-18T12:31:49Z"},{"alias_kind":"pith_short_8","alias_value":"UROFS6QK","created_at":"2026-05-18T12:31:49Z"}],"graph_snapshots":[{"event_id":"sha256:0687268a026856d5ae494dd9d3137e42b3c41972a162f3ed7f9d61191cf38d44","target":"graph","created_at":"2026-05-18T00:44:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We propose an unsupervised method for reference resolution in instructional videos, where the goal is to temporally link an entity (e.g., \"dressing\") to the action (e.g., \"mix yogurt\") that produced it. The key challenge is the inevitable visual-linguistic ambiguities arising from the changes in both visual appearance and referring expression of an entity in the video. This challenge is amplified by the fact that we aim to resolve references with no supervision. We address these challenges by learning a joint visual-linguistic model, where linguistic cues can help resolve visual ambiguities an","authors_text":"De-An Huang, Joseph J. Lim, Juan Carlos Niebles, Li Fei-Fei","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-07T18:40:27Z","title":"Unsupervised Visual-Linguistic Reference Resolution in Instructional Videos"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.02521","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:558ac8ddfabe358e7ede797195654f74a5efc346bf5c66e480561075f532c2c5","target":"record","created_at":"2026-05-18T00:44:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f726690f3b789725fb655a99b4a9ba0ab15db832ebaeb6ddc46350c6a4156e6e","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-07T18:40:27Z","title_canon_sha256":"56d29b7c8fe163778ca470f1de5cdc48bc3df8c962be1a88ee54901bdf0061e3"},"schema_version":"1.0","source":{"id":"1703.02521","kind":"arxiv","version":2}},"canonical_sha256":"a45c597a0a998ec3fe5bdfc890e9d3c6ea83e20341898104a4fac1b45bd2a859","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a45c597a0a998ec3fe5bdfc890e9d3c6ea83e20341898104a4fac1b45bd2a859","first_computed_at":"2026-05-18T00:44:08.843513Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:44:08.843513Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tc5kGc71zqtRm+dz2xpER729nRMAZmgtBGjwcr/uo5w+3DE9JYgDI1t3K41QQYDpI4+yCq+ikK1CNCjIbnZ6CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:44:08.844054Z","signed_message":"canonical_sha256_bytes"},"source_id":"1703.02521","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:558ac8ddfabe358e7ede797195654f74a5efc346bf5c66e480561075f532c2c5","sha256:0687268a026856d5ae494dd9d3137e42b3c41972a162f3ed7f9d61191cf38d44"],"state_sha256":"7ff7a6c0b35e34ecf4573468876758f28959ab68ac133bc91b925a94c539601d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6uM0SIga3VwaTSjAE/vJ0+a+Uo6H9cVaazEOfYX6Wx5ymVM9Mj/oUr80nJOpQB5jcZlRNCG8EzH/3JAMOPPDDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-18T23:12:30.406131Z","bundle_sha256":"44e26e1790304003682dea613eed467f5e7718fd448123061e7c07ad46355289"}}