{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:DJWX2G6J5OC2UL7T6L23U745RV","short_pith_number":"pith:DJWX2G6J","canonical_record":{"source":{"id":"1611.06986","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-11-21T20:08:51Z","cross_cats_sorted":["cs.LG","cs.SD"],"title_canon_sha256":"4d5669ae06b8f7e65876114649942aaf5922b11490e69da817a1ca2253a3f5a3","abstract_canon_sha256":"ee1d44ff1dd59da9289a29ac21f158b68c209f42009bad9186d61f2da768d137"},"schema_version":"1.0"},"canonical_sha256":"1a6d7d1bc9eb85aa2ff3f2f5ba7f9d8d71b8f0d03cf990a64667f92afa3d365a","source":{"kind":"arxiv","id":"1611.06986","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.06986","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"arxiv_version","alias_value":"1611.06986v1","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.06986","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"pith_short_12","alias_value":"DJWX2G6J5OC2","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_16","alias_value":"DJWX2G6J5OC2UL7T","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_8","alias_value":"DJWX2G6J","created_at":"2026-05-18T12:30:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:DJWX2G6J5OC2UL7T6L23U745RV","target":"record","payload":{"canonical_record":{"source":{"id":"1611.06986","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-11-21T20:08:51Z","cross_cats_sorted":["cs.LG","cs.SD"],"title_canon_sha256":"4d5669ae06b8f7e65876114649942aaf5922b11490e69da817a1ca2253a3f5a3","abstract_canon_sha256":"ee1d44ff1dd59da9289a29ac21f158b68c209f42009bad9186d61f2da768d137"},"schema_version":"1.0"},"canonical_sha256":"1a6d7d1bc9eb85aa2ff3f2f5ba7f9d8d71b8f0d03cf990a64667f92afa3d365a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:57:32.395433Z","signature_b64":"lZQ7g4KfAmbHmAqTPtSHrT146j1rTpjrloRG8AJ3sgv/V1mH03xQ4jp69UCA1fMNcEtyuT4dn1zgjTB1Lc+jDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1a6d7d1bc9eb85aa2ff3f2f5ba7f9d8d71b8f0d03cf990a64667f92afa3d365a","last_reissued_at":"2026-05-18T00:57:32.394920Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:57:32.394920Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1611.06986","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:57:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MJ3vEGPmGGvrKH8NejO91FpXlcL7eBtusoWcHxHy7sjY70ibvODMNoibEus5M8+rEPMzGwAbaFmLz/O5UMlTDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:16:06.769811Z"},"content_sha256":"98200b1def5edd82f2af19587cfe22b76a31a631433b2ef2a85b09b05a6a00ad","schema_version":"1.0","event_id":"sha256:98200b1def5edd82f2af19587cfe22b76a31a631433b2ef2a85b09b05a6a00ad"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:DJWX2G6J5OC2UL7T6L23U745RV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Robust end-to-end deep audiovisual speech recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.SD"],"primary_cat":"cs.CL","authors_text":"Fernando De la Torre, Florian Metze, Ramon Sanabria","submitted_at":"2016-11-21T20:08:51Z","abstract_excerpt":"Speech is one of the most effective ways of communication among humans. Even though audio is the most common way of transmitting speech, very important information can be found in other modalities, such as vision. Vision is particularly useful when the acoustic signal is corrupted. Multi-modal speech recognition however has not yet found wide-spread use, mostly because the temporal alignment and fusion of the different information sources is challenging.\n  This paper presents an end-to-end audiovisual speech recognizer (AVSR), based on recurrent neural networks (RNN) with a connectionist tempo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.06986","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:57:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RposrRRnfsX8+zfWwg2Dqzdfq9m/+Wm1UwEt1soFiaxGmonCnZL/jF/mZ5PEQ31Ur/fGUCoOSE3zik9YSkaNBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T11:16:06.770158Z"},"content_sha256":"8ca7766954963b0ce22860fc50e854a2ea0b4e93e1e580198fd56c6208769132","schema_version":"1.0","event_id":"sha256:8ca7766954963b0ce22860fc50e854a2ea0b4e93e1e580198fd56c6208769132"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DJWX2G6J5OC2UL7T6L23U745RV/bundle.json","state_url":"https://pith.science/pith/DJWX2G6J5OC2UL7T6L23U745RV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DJWX2G6J5OC2UL7T6L23U745RV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T11:16:06Z","links":{"resolver":"https://pith.science/pith/DJWX2G6J5OC2UL7T6L23U745RV","bundle":"https://pith.science/pith/DJWX2G6J5OC2UL7T6L23U745RV/bundle.json","state":"https://pith.science/pith/DJWX2G6J5OC2UL7T6L23U745RV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DJWX2G6J5OC2UL7T6L23U745RV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:DJWX2G6J5OC2UL7T6L23U745RV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ee1d44ff1dd59da9289a29ac21f158b68c209f42009bad9186d61f2da768d137","cross_cats_sorted":["cs.LG","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-11-21T20:08:51Z","title_canon_sha256":"4d5669ae06b8f7e65876114649942aaf5922b11490e69da817a1ca2253a3f5a3"},"schema_version":"1.0","source":{"id":"1611.06986","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.06986","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"arxiv_version","alias_value":"1611.06986v1","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.06986","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"pith_short_12","alias_value":"DJWX2G6J5OC2","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_16","alias_value":"DJWX2G6J5OC2UL7T","created_at":"2026-05-18T12:30:12Z"},{"alias_kind":"pith_short_8","alias_value":"DJWX2G6J","created_at":"2026-05-18T12:30:12Z"}],"graph_snapshots":[{"event_id":"sha256:8ca7766954963b0ce22860fc50e854a2ea0b4e93e1e580198fd56c6208769132","target":"graph","created_at":"2026-05-18T00:57:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Speech is one of the most effective ways of communication among humans. Even though audio is the most common way of transmitting speech, very important information can be found in other modalities, such as vision. Vision is particularly useful when the acoustic signal is corrupted. Multi-modal speech recognition however has not yet found wide-spread use, mostly because the temporal alignment and fusion of the different information sources is challenging.\n  This paper presents an end-to-end audiovisual speech recognizer (AVSR), based on recurrent neural networks (RNN) with a connectionist tempo","authors_text":"Fernando De la Torre, Florian Metze, Ramon Sanabria","cross_cats":["cs.LG","cs.SD"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-11-21T20:08:51Z","title":"Robust end-to-end deep audiovisual speech recognition"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.06986","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:98200b1def5edd82f2af19587cfe22b76a31a631433b2ef2a85b09b05a6a00ad","target":"record","created_at":"2026-05-18T00:57:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ee1d44ff1dd59da9289a29ac21f158b68c209f42009bad9186d61f2da768d137","cross_cats_sorted":["cs.LG","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-11-21T20:08:51Z","title_canon_sha256":"4d5669ae06b8f7e65876114649942aaf5922b11490e69da817a1ca2253a3f5a3"},"schema_version":"1.0","source":{"id":"1611.06986","kind":"arxiv","version":1}},"canonical_sha256":"1a6d7d1bc9eb85aa2ff3f2f5ba7f9d8d71b8f0d03cf990a64667f92afa3d365a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1a6d7d1bc9eb85aa2ff3f2f5ba7f9d8d71b8f0d03cf990a64667f92afa3d365a","first_computed_at":"2026-05-18T00:57:32.394920Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:57:32.394920Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"lZQ7g4KfAmbHmAqTPtSHrT146j1rTpjrloRG8AJ3sgv/V1mH03xQ4jp69UCA1fMNcEtyuT4dn1zgjTB1Lc+jDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:57:32.395433Z","signed_message":"canonical_sha256_bytes"},"source_id":"1611.06986","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:98200b1def5edd82f2af19587cfe22b76a31a631433b2ef2a85b09b05a6a00ad","sha256:8ca7766954963b0ce22860fc50e854a2ea0b4e93e1e580198fd56c6208769132"],"state_sha256":"1a7b1a2d764514bc9c672b63bd1b68e8a241cbc22ce682f0c84c844adaf0146d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OURahAiwn2Sr14B1EIWyoFw7WMz0+YtYG5RHRpWELRUapZ5uCfCYylMVjHHJFO11OKeiyDgmTrMpyWMeEL5mAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T11:16:06.772105Z","bundle_sha256":"eb5a4eebc74ecf0636c678026270528020197acd0763e98677c0385bd26b8880"}}