{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:R4Y3VJSPSUGJCEGU7RYYSFULMP","short_pith_number":"pith:R4Y3VJSP","schema_version":"1.0","canonical_sha256":"8f31baa64f950c9110d4fc7189168b63e66a86c17eabbf2113dad486770bb187","source":{"kind":"arxiv","id":"2603.17837","version":4},"attestation_state":"computed","paper":{"title":"The Silent Thought: Modeling Internal Cognition in Full-Duplex Spoken Dialogue Models via Latent Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Spoken dialogue models can perform continuous internal reasoning while listening by recursively updating latent embeddings.","cross_cats":["cs.CL"],"primary_cat":"eess.AS","authors_text":"Chen Chen, Donghang Wu, Eng Siong Chng, Hexin Liu, Tianyu Zhang, Yoshua Bengio, Yuxin Li","submitted_at":"2026-03-18T15:30:29Z","abstract_excerpt":"During conversational interactions, humans subconsciously engage in concurrent thinking while listening to a speaker. Although this internal cognitive processing may not always manifest as explicit linguistic structures, it is instrumental in formulating high-quality responses. Inspired by this cognitive phenomenon, we propose a novel Full-duplex LAtent and Internal Reasoning method named FLAIR that conducts latent thinking simultaneously with speech perception. Unlike conventional \"thinking\" mechanisms in NLP, which require post-hoc generation, our approach aligns seamlessly with spoken dialo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.17837","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"eess.AS","submitted_at":"2026-03-18T15:30:29Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"d6d9110a3802ae95800846fa36cda7b2145e2fe02437d86fca1a182077ac8d08","abstract_canon_sha256":"7ef5b6f0546ee634d454410ff9aa6a4ad13752e6f8c9bc401bdd187fa84d03dd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:17.921942Z","signature_b64":"lT3apLjfhhe/bT9fC1kM0Jxw61AEuBlNZP3cJ3QQYyDR4uTFGaxraWqAv/Yo6GyOM5c1wLEVQhgydNlhKHfuBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f31baa64f950c9110d4fc7189168b63e66a86c17eabbf2113dad486770bb187","last_reissued_at":"2026-05-21T01:05:17.921259Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:17.921259Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Silent Thought: Modeling Internal Cognition in Full-Duplex Spoken Dialogue Models via Latent Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Spoken dialogue models can perform continuous internal reasoning while listening by recursively updating latent embeddings.","cross_cats":["cs.CL"],"primary_cat":"eess.AS","authors_text":"Chen Chen, Donghang Wu, Eng Siong Chng, Hexin Liu, Tianyu Zhang, Yoshua Bengio, Yuxin Li","submitted_at":"2026-03-18T15:30:29Z","abstract_excerpt":"During conversational interactions, humans subconsciously engage in concurrent thinking while listening to a speaker. Although this internal cognitive processing may not always manifest as explicit linguistic structures, it is instrumental in formulating high-quality responses. Inspired by this cognitive phenomenon, we propose a novel Full-duplex LAtent and Internal Reasoning method named FLAIR that conducts latent thinking simultaneously with speech perception. Unlike conventional \"thinking\" mechanisms in NLP, which require post-hoc generation, our approach aligns seamlessly with spoken dialo"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"our approach aligns seamlessly with spoken dialogue systems: during the user's speaking phase, it recursively feeds the latent embedding output from the previous step into the next step, enabling continuous reasoning that strictly adheres to causality without introducing additional latency.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That recursive updates to latent embeddings can meaningfully represent and advance internal cognitive processing without explicit reasoning supervision or causing drift in the dialogue state.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"FLAIR enables spoken dialogue AI to conduct continuous latent reasoning while perceiving speech through recursive latent embeddings and an ELBO-based finetuning objective.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Spoken dialogue models can perform continuous internal reasoning while listening by recursively updating latent embeddings.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d6b64c2ab9f84aa93cbcc766a8eedaece3415adc6dba19a3572e5e3e68b3567f"},"source":{"id":"2603.17837","kind":"arxiv","version":4},"verdict":{"id":"3f47084e-135b-4395-9537-97cd2e0a2075","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T08:32:22.876995Z","strongest_claim":"our approach aligns seamlessly with spoken dialogue systems: during the user's speaking phase, it recursively feeds the latent embedding output from the previous step into the next step, enabling continuous reasoning that strictly adheres to causality without introducing additional latency.","one_line_summary":"FLAIR enables spoken dialogue AI to conduct continuous latent reasoning while perceiving speech through recursive latent embeddings and an ELBO-based finetuning objective.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That recursive updates to latent embeddings can meaningfully represent and advance internal cognitive processing without explicit reasoning supervision or causing drift in the dialogue state.","pith_extraction_headline":"Spoken dialogue models can perform continuous internal reasoning while listening by recursively updating latent embeddings."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.17837/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.17837","created_at":"2026-05-21T01:05:17.921372+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.17837v4","created_at":"2026-05-21T01:05:17.921372+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.17837","created_at":"2026-05-21T01:05:17.921372+00:00"},{"alias_kind":"pith_short_12","alias_value":"R4Y3VJSPSUGJ","created_at":"2026-05-21T01:05:17.921372+00:00"},{"alias_kind":"pith_short_16","alias_value":"R4Y3VJSPSUGJCEGU","created_at":"2026-05-21T01:05:17.921372+00:00"},{"alias_kind":"pith_short_8","alias_value":"R4Y3VJSP","created_at":"2026-05-21T01:05:17.921372+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.20266","citing_title":"A Survey of Large Audio Language Models: Generalization, Trustworthiness, and Outlook","ref_index":80,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21008","citing_title":"A Survey of Audio Reasoning in Multimodal Foundation Models","ref_index":82,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP","json":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP.json","graph_json":"https://pith.science/api/pith-number/R4Y3VJSPSUGJCEGU7RYYSFULMP/graph.json","events_json":"https://pith.science/api/pith-number/R4Y3VJSPSUGJCEGU7RYYSFULMP/events.json","paper":"https://pith.science/paper/R4Y3VJSP"},"agent_actions":{"view_html":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP","download_json":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP.json","view_paper":"https://pith.science/paper/R4Y3VJSP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.17837&json=true","fetch_graph":"https://pith.science/api/pith-number/R4Y3VJSPSUGJCEGU7RYYSFULMP/graph.json","fetch_events":"https://pith.science/api/pith-number/R4Y3VJSPSUGJCEGU7RYYSFULMP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP/action/storage_attestation","attest_author":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP/action/author_attestation","sign_citation":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP/action/citation_signature","submit_replication":"https://pith.science/pith/R4Y3VJSPSUGJCEGU7RYYSFULMP/action/replication_record"}},"created_at":"2026-05-21T01:05:17.921372+00:00","updated_at":"2026-05-21T01:05:17.921372+00:00"}