{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:ZHLDLYEXLPF3TL6NCKK74NFCRE","short_pith_number":"pith:ZHLDLYEX","canonical_record":{"source":{"id":"1712.00489","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-12-01T20:56:31Z","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","eess.AS"],"title_canon_sha256":"9016296210496eab92f935890dbd863408b2e2859bac9e87fb9df70042bb31f3","abstract_canon_sha256":"d52fb1694150d957bd7ee45d969100b1be3aacd3746e5bc931461b83e2f96079"},"schema_version":"1.0"},"canonical_sha256":"c9d635e0975bcbb9afcd1295fe34a2892debed75eb40accc7931443117127964","source":{"kind":"arxiv","id":"1712.00489","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.00489","created_at":"2026-05-18T00:28:34Z"},{"alias_kind":"arxiv_version","alias_value":"1712.00489v1","created_at":"2026-05-18T00:28:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.00489","created_at":"2026-05-18T00:28:34Z"},{"alias_kind":"pith_short_12","alias_value":"ZHLDLYEXLPF3","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZHLDLYEXLPF3TL6N","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZHLDLYEX","created_at":"2026-05-18T12:31:59Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:ZHLDLYEXLPF3TL6NCKK74NFCRE","target":"record","payload":{"canonical_record":{"source":{"id":"1712.00489","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-12-01T20:56:31Z","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","eess.AS"],"title_canon_sha256":"9016296210496eab92f935890dbd863408b2e2859bac9e87fb9df70042bb31f3","abstract_canon_sha256":"d52fb1694150d957bd7ee45d969100b1be3aacd3746e5bc931461b83e2f96079"},"schema_version":"1.0"},"canonical_sha256":"c9d635e0975bcbb9afcd1295fe34a2892debed75eb40accc7931443117127964","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:28:34.578586Z","signature_b64":"M9VvNeRG1U91SI+tomQiD2NvP1LpEL+PiTtucba6NbMk8W3w0QzT7nfbCyUhtX8d/ZN/iyz5pdV18ZE061asDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c9d635e0975bcbb9afcd1295fe34a2892debed75eb40accc7931443117127964","last_reissued_at":"2026-05-18T00:28:34.577744Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:28:34.577744Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.00489","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aDcNRZrN3ULuUu6QJxi2coeJXN151nygncFnfs/m88m3byLXWAxZjqRGxi23H4ARQUMHHPCmBsOGItbnDbyEBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T04:58:40.047283Z"},"content_sha256":"7ce5e2904d5ae4507dd9c5a331602e2d5cc8ee7e44465887538d180f23d9000e","schema_version":"1.0","event_id":"sha256:7ce5e2904d5ae4507dd9c5a331602e2d5cc8ee7e44465887538d180f23d9000e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:ZHLDLYEXLPF3TL6NCKK74NFCRE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Visual Features for Context-Aware Speech Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.LG","eess.AS"],"primary_cat":"cs.CL","authors_text":"Abhinav Gupta, Florian Metze, Leonardo Neves, Yajie Miao","submitted_at":"2017-12-01T20:56:31Z","abstract_excerpt":"Automatic transcriptions of consumer-generated multi-media content such as \"Youtube\" videos still exhibit high word error rates. Such data typically occupies a very broad domain, has been recorded in challenging conditions, with cheap hardware and a focus on the visual modality, and may have been post-processed or edited. In this paper, we extend our earlier work on adapting the acoustic model of a DNN-based speech recognition system to an RNN language model and show how both can be adapted to the objects and scenes that can be automatically detected in the video. We are working on a corpus of"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.00489","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KPQVaDFE/YeCBVplMq9KFa9NxkhIu+psVvVMndC6P/dYARtdAOjgootk7kxWOMeUAYE5e5838ISa7aD7yS15Bg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T04:58:40.047989Z"},"content_sha256":"47de05e33c58dd88c60dfcb8f3a2c492343420dd9a3d753d1603e710b5b9e3e2","schema_version":"1.0","event_id":"sha256:47de05e33c58dd88c60dfcb8f3a2c492343420dd9a3d753d1603e710b5b9e3e2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE/bundle.json","state_url":"https://pith.science/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T04:58:40Z","links":{"resolver":"https://pith.science/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE","bundle":"https://pith.science/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE/bundle.json","state":"https://pith.science/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZHLDLYEXLPF3TL6NCKK74NFCRE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ZHLDLYEXLPF3TL6NCKK74NFCRE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d52fb1694150d957bd7ee45d969100b1be3aacd3746e5bc931461b83e2f96079","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-12-01T20:56:31Z","title_canon_sha256":"9016296210496eab92f935890dbd863408b2e2859bac9e87fb9df70042bb31f3"},"schema_version":"1.0","source":{"id":"1712.00489","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.00489","created_at":"2026-05-18T00:28:34Z"},{"alias_kind":"arxiv_version","alias_value":"1712.00489v1","created_at":"2026-05-18T00:28:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.00489","created_at":"2026-05-18T00:28:34Z"},{"alias_kind":"pith_short_12","alias_value":"ZHLDLYEXLPF3","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_16","alias_value":"ZHLDLYEXLPF3TL6N","created_at":"2026-05-18T12:31:59Z"},{"alias_kind":"pith_short_8","alias_value":"ZHLDLYEX","created_at":"2026-05-18T12:31:59Z"}],"graph_snapshots":[{"event_id":"sha256:47de05e33c58dd88c60dfcb8f3a2c492343420dd9a3d753d1603e710b5b9e3e2","target":"graph","created_at":"2026-05-18T00:28:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Automatic transcriptions of consumer-generated multi-media content such as \"Youtube\" videos still exhibit high word error rates. Such data typically occupies a very broad domain, has been recorded in challenging conditions, with cheap hardware and a focus on the visual modality, and may have been post-processed or edited. In this paper, we extend our earlier work on adapting the acoustic model of a DNN-based speech recognition system to an RNN language model and show how both can be adapted to the objects and scenes that can be automatically detected in the video. We are working on a corpus of","authors_text":"Abhinav Gupta, Florian Metze, Leonardo Neves, Yajie Miao","cross_cats":["cs.AI","cs.CV","cs.LG","eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-12-01T20:56:31Z","title":"Visual Features for Context-Aware Speech Recognition"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.00489","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7ce5e2904d5ae4507dd9c5a331602e2d5cc8ee7e44465887538d180f23d9000e","target":"record","created_at":"2026-05-18T00:28:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d52fb1694150d957bd7ee45d969100b1be3aacd3746e5bc931461b83e2f96079","cross_cats_sorted":["cs.AI","cs.CV","cs.LG","eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-12-01T20:56:31Z","title_canon_sha256":"9016296210496eab92f935890dbd863408b2e2859bac9e87fb9df70042bb31f3"},"schema_version":"1.0","source":{"id":"1712.00489","kind":"arxiv","version":1}},"canonical_sha256":"c9d635e0975bcbb9afcd1295fe34a2892debed75eb40accc7931443117127964","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c9d635e0975bcbb9afcd1295fe34a2892debed75eb40accc7931443117127964","first_computed_at":"2026-05-18T00:28:34.577744Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:28:34.577744Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"M9VvNeRG1U91SI+tomQiD2NvP1LpEL+PiTtucba6NbMk8W3w0QzT7nfbCyUhtX8d/ZN/iyz5pdV18ZE061asDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:28:34.578586Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.00489","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7ce5e2904d5ae4507dd9c5a331602e2d5cc8ee7e44465887538d180f23d9000e","sha256:47de05e33c58dd88c60dfcb8f3a2c492343420dd9a3d753d1603e710b5b9e3e2"],"state_sha256":"0ee7bb5695134c8452e22a6d3a04ef084c55fa2710900799e5f4e0478d46a762"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GzbuYzHKj1EyYPEfHXHdbxyq4bt2X3ECiPPIy8J7MkQF8voXc10NdQwuw5HZew99/Gkc9rgFKLGdZcNw9vM5Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T04:58:40.051359Z","bundle_sha256":"e8354c4bfb85b4e63ded83a1b1aa54ec5f720c2387ef3c1da0ea9c548fb4d430"}}