{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:RDVZS6LKC6LBTOS32WHXPHRXWE","short_pith_number":"pith:RDVZS6LK","canonical_record":{"source":{"id":"1703.08136","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T16:46:00Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"0eb2f936a875386c44cf6cdd6fd213b339ed15bcde831d5485d083b90eeb2918","abstract_canon_sha256":"72a561dbed7242c0775dbf94fdb4979e7ee441fdb6bd185e6af3c79ba3388e06"},"schema_version":"1.0"},"canonical_sha256":"88eb99796a179619ba5bd58f779e37b10d6b3804048ce6d32ce83958e16bac74","source":{"kind":"arxiv","id":"1703.08136","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.08136","created_at":"2026-05-18T00:43:39Z"},{"alias_kind":"arxiv_version","alias_value":"1703.08136v2","created_at":"2026-05-18T00:43:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08136","created_at":"2026-05-18T00:43:39Z"},{"alias_kind":"pith_short_12","alias_value":"RDVZS6LKC6LB","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_16","alias_value":"RDVZS6LKC6LBTOS3","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_8","alias_value":"RDVZS6LK","created_at":"2026-05-18T12:31:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:RDVZS6LKC6LBTOS32WHXPHRXWE","target":"record","payload":{"canonical_record":{"source":{"id":"1703.08136","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T16:46:00Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"0eb2f936a875386c44cf6cdd6fd213b339ed15bcde831d5485d083b90eeb2918","abstract_canon_sha256":"72a561dbed7242c0775dbf94fdb4979e7ee441fdb6bd185e6af3c79ba3388e06"},"schema_version":"1.0"},"canonical_sha256":"88eb99796a179619ba5bd58f779e37b10d6b3804048ce6d32ce83958e16bac74","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:43:39.243008Z","signature_b64":"5fBzTaMAcZOusvdXBUEt+DQ5Rr6qm/cwhhmsbnVAVmkLlh5F7DykctR0ENfja0QYouD24X9VlD0uAt1u+Wt4Dg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"88eb99796a179619ba5bd58f779e37b10d6b3804048ce6d32ce83958e16bac74","last_reissued_at":"2026-05-18T00:43:39.242523Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:43:39.242523Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1703.08136","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pkhKNzIcdZHbfbg1AIxnIhqlLx39UkeSVxMpW/C+AY79R3X5A0rV8Ucl4Vb8fB04GJx3dA4KcFp2NWf7bIYMBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T22:38:56.541450Z"},"content_sha256":"5ff6f20cc7a7d175f81f3b909d5c076848f7ace6919ea6b0f7cfa097ed95a9c2","schema_version":"1.0","event_id":"sha256:5ff6f20cc7a7d175f81f3b909d5c076848f7ace6919ea6b0f7cfa097ed95a9c2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:RDVZS6LKC6LBTOS32WHXPHRXWE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Visually grounded learning of keyword prediction from untranscribed speech","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Gregory Shakhnarovich, Herman Kamper, Karen Livescu, Shane Settle","submitted_at":"2017-03-23T16:46:00Z","abstract_excerpt":"During language acquisition, infants have the benefit of visual cues to ground spoken language. Robots similarly have access to audio and visual sensors. Recent work has shown that images and spoken captions can be mapped into a meaningful common space, allowing images to be retrieved using speech and vice versa. In this setting of images paired with untranscribed spoken captions, we consider whether computer vision systems can be used to obtain textual labels for the speech. Concretely, we use an image-to-words multi-label visual classifier to tag images with soft textual labels, and then tra"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08136","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:43:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HkmYxnn5weLrxRtKvCxPnbwd9PBRH5/J0EY1K6TihaG0M6CK4MykccTORdwNd7I64XlDlJou0FpCRW1+TdmGBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T22:38:56.541792Z"},"content_sha256":"492965e3db54fbca973e5d16c9f2fd8001dabe3c84be4c1bf3d4e255410bdffe","schema_version":"1.0","event_id":"sha256:492965e3db54fbca973e5d16c9f2fd8001dabe3c84be4c1bf3d4e255410bdffe"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/RDVZS6LKC6LBTOS32WHXPHRXWE/bundle.json","state_url":"https://pith.science/pith/RDVZS6LKC6LBTOS32WHXPHRXWE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/RDVZS6LKC6LBTOS32WHXPHRXWE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T22:38:56Z","links":{"resolver":"https://pith.science/pith/RDVZS6LKC6LBTOS32WHXPHRXWE","bundle":"https://pith.science/pith/RDVZS6LKC6LBTOS32WHXPHRXWE/bundle.json","state":"https://pith.science/pith/RDVZS6LKC6LBTOS32WHXPHRXWE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/RDVZS6LKC6LBTOS32WHXPHRXWE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:RDVZS6LKC6LBTOS32WHXPHRXWE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"72a561dbed7242c0775dbf94fdb4979e7ee441fdb6bd185e6af3c79ba3388e06","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T16:46:00Z","title_canon_sha256":"0eb2f936a875386c44cf6cdd6fd213b339ed15bcde831d5485d083b90eeb2918"},"schema_version":"1.0","source":{"id":"1703.08136","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.08136","created_at":"2026-05-18T00:43:39Z"},{"alias_kind":"arxiv_version","alias_value":"1703.08136v2","created_at":"2026-05-18T00:43:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08136","created_at":"2026-05-18T00:43:39Z"},{"alias_kind":"pith_short_12","alias_value":"RDVZS6LKC6LB","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_16","alias_value":"RDVZS6LKC6LBTOS3","created_at":"2026-05-18T12:31:39Z"},{"alias_kind":"pith_short_8","alias_value":"RDVZS6LK","created_at":"2026-05-18T12:31:39Z"}],"graph_snapshots":[{"event_id":"sha256:492965e3db54fbca973e5d16c9f2fd8001dabe3c84be4c1bf3d4e255410bdffe","target":"graph","created_at":"2026-05-18T00:43:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"During language acquisition, infants have the benefit of visual cues to ground spoken language. Robots similarly have access to audio and visual sensors. Recent work has shown that images and spoken captions can be mapped into a meaningful common space, allowing images to be retrieved using speech and vice versa. In this setting of images paired with untranscribed spoken captions, we consider whether computer vision systems can be used to obtain textual labels for the speech. Concretely, we use an image-to-words multi-label visual classifier to tag images with soft textual labels, and then tra","authors_text":"Gregory Shakhnarovich, Herman Kamper, Karen Livescu, Shane Settle","cross_cats":["cs.CV"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T16:46:00Z","title":"Visually grounded learning of keyword prediction from untranscribed speech"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08136","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ff6f20cc7a7d175f81f3b909d5c076848f7ace6919ea6b0f7cfa097ed95a9c2","target":"record","created_at":"2026-05-18T00:43:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"72a561dbed7242c0775dbf94fdb4979e7ee441fdb6bd185e6af3c79ba3388e06","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-03-23T16:46:00Z","title_canon_sha256":"0eb2f936a875386c44cf6cdd6fd213b339ed15bcde831d5485d083b90eeb2918"},"schema_version":"1.0","source":{"id":"1703.08136","kind":"arxiv","version":2}},"canonical_sha256":"88eb99796a179619ba5bd58f779e37b10d6b3804048ce6d32ce83958e16bac74","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"88eb99796a179619ba5bd58f779e37b10d6b3804048ce6d32ce83958e16bac74","first_computed_at":"2026-05-18T00:43:39.242523Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:43:39.242523Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5fBzTaMAcZOusvdXBUEt+DQ5Rr6qm/cwhhmsbnVAVmkLlh5F7DykctR0ENfja0QYouD24X9VlD0uAt1u+Wt4Dg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:43:39.243008Z","signed_message":"canonical_sha256_bytes"},"source_id":"1703.08136","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5ff6f20cc7a7d175f81f3b909d5c076848f7ace6919ea6b0f7cfa097ed95a9c2","sha256:492965e3db54fbca973e5d16c9f2fd8001dabe3c84be4c1bf3d4e255410bdffe"],"state_sha256":"960450fa9e67a868c032504c1183f14e8cb2f89cbb454d8ef377d59a73228c7d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UTSKJaLmiptX/xPb8ZUxpWhCIBcB0Gzh7VyRZYPMu3inFC24QtNZ1UP90oMTXP5I4M2DJ6ZKIxwuQc2n5zX+Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T22:38:56.543685Z","bundle_sha256":"0c1e0350ebc84dd797b0fcb231eb5b92077c914adbdb5d848dc56f39cedf1b76"}}