{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:WD3VKCY6G4T2FYYWWNOS3ZLM6P","short_pith_number":"pith:WD3VKCY6","schema_version":"1.0","canonical_sha256":"b0f7550b1e3727a2e316b35d2de56cf3de307de35cebdef376d947c31f5388ac","source":{"kind":"arxiv","id":"1901.04889","version":1},"attestation_state":"computed","paper":{"title":"Deep Fusion: An Attention Guided Factorized Bilinear Pooling for Audio-video Emotion Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.HC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jun Du, Yuanyuan Zhang, Zi-Rui Wang","submitted_at":"2019-01-15T15:51:39Z","abstract_excerpt":"Automatic emotion recognition (AER) is a challenging task due to the abstract concept and multiple expressions of emotion. Although there is no consensus on a definition, human emotional states usually can be apperceived by auditory and visual systems. Inspired by this cognitive process in human beings, it's natural to simultaneously utilize audio and visual information in AER. However, most traditional fusion approaches only build a linear paradigm, such as feature concatenation and multi-system fusion, which hardly captures complex association between audio and video. In this paper, we intro"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.04889","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-15T15:51:39Z","cross_cats_sorted":["cs.CV","cs.HC","stat.ML"],"title_canon_sha256":"8fdfbd56cbec35bc006d08ec970ae4931803fbe45fe26d141b9b9ff850e48a1d","abstract_canon_sha256":"0bbe53e7e998918c4a0313036970d12a2f4be79caa9a9fa472a3cdebc5baa300"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:56:21.402277Z","signature_b64":"0lsswnMLcIiHvfXw6tIGqMv90WS+5NFd25DZSWt91qReWDF5uItXJK4NDu4sxmP7tGGhaud0C/9lB2pmXpkHAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b0f7550b1e3727a2e316b35d2de56cf3de307de35cebdef376d947c31f5388ac","last_reissued_at":"2026-05-17T23:56:21.401719Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:56:21.401719Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Deep Fusion: An Attention Guided Factorized Bilinear Pooling for Audio-video Emotion Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.HC","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jun Du, Yuanyuan Zhang, Zi-Rui Wang","submitted_at":"2019-01-15T15:51:39Z","abstract_excerpt":"Automatic emotion recognition (AER) is a challenging task due to the abstract concept and multiple expressions of emotion. Although there is no consensus on a definition, human emotional states usually can be apperceived by auditory and visual systems. Inspired by this cognitive process in human beings, it's natural to simultaneously utilize audio and visual information in AER. However, most traditional fusion approaches only build a linear paradigm, such as feature concatenation and multi-system fusion, which hardly captures complex association between audio and video. In this paper, we intro"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.04889","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.04889","created_at":"2026-05-17T23:56:21.401819+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.04889v1","created_at":"2026-05-17T23:56:21.401819+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.04889","created_at":"2026-05-17T23:56:21.401819+00:00"},{"alias_kind":"pith_short_12","alias_value":"WD3VKCY6G4T2","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"WD3VKCY6G4T2FYYW","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"WD3VKCY6","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P","json":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P.json","graph_json":"https://pith.science/api/pith-number/WD3VKCY6G4T2FYYWWNOS3ZLM6P/graph.json","events_json":"https://pith.science/api/pith-number/WD3VKCY6G4T2FYYWWNOS3ZLM6P/events.json","paper":"https://pith.science/paper/WD3VKCY6"},"agent_actions":{"view_html":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P","download_json":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P.json","view_paper":"https://pith.science/paper/WD3VKCY6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.04889&json=true","fetch_graph":"https://pith.science/api/pith-number/WD3VKCY6G4T2FYYWWNOS3ZLM6P/graph.json","fetch_events":"https://pith.science/api/pith-number/WD3VKCY6G4T2FYYWWNOS3ZLM6P/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P/action/storage_attestation","attest_author":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P/action/author_attestation","sign_citation":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P/action/citation_signature","submit_replication":"https://pith.science/pith/WD3VKCY6G4T2FYYWWNOS3ZLM6P/action/replication_record"}},"created_at":"2026-05-17T23:56:21.401819+00:00","updated_at":"2026-05-17T23:56:21.401819+00:00"}