windows: Properly handle surrogates (#34006)

张小白 created

Closes #33791

Surrogate pairs are now handled correctly, so input from tools like
`WinCompose` is properly received.

Release Notes:

- N/A

Change summary

crates/gpui/src/platform/windows/events.rs | 41 ++++++++++++++++++++---
crates/gpui/src/platform/windows/window.rs |  3 +
2 files changed, 38 insertions(+), 6 deletions(-)

Detailed changes

crates/gpui/src/platform/windows/events.rs 🔗

@@ -466,12 +466,7 @@ fn handle_keyup_msg(
 }
 
 fn handle_char_msg(wparam: WPARAM, state_ptr: Rc<WindowsWindowStatePtr>) -> Option<isize> {
-    let Some(input) = char::from_u32(wparam.0 as u32)
-        .filter(|c| !c.is_control())
-        .map(String::from)
-    else {
-        return Some(1);
-    };
+    let input = parse_char_message(wparam, &state_ptr)?;
     with_input_handler(&state_ptr, |input_handler| {
         input_handler.replace_text_in_range(None, &input);
     });
@@ -1228,6 +1223,36 @@ fn handle_input_language_changed(
     Some(0)
 }
 
+#[inline]
+fn parse_char_message(wparam: WPARAM, state_ptr: &Rc<WindowsWindowStatePtr>) -> Option<String> {
+    let code_point = wparam.loword();
+    let mut lock = state_ptr.state.borrow_mut();
+    // https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G2630
+    match code_point {
+        0xD800..=0xDBFF => {
+            // High surrogate, wait for low surrogate
+            lock.pending_surrogate = Some(code_point);
+            None
+        }
+        0xDC00..=0xDFFF => {
+            if let Some(high_surrogate) = lock.pending_surrogate.take() {
+                // Low surrogate, combine with pending high surrogate
+                String::from_utf16(&[high_surrogate, code_point]).ok()
+            } else {
+                // Invalid low surrogate without a preceding high surrogate
+                log::warn!(
+                    "Received low surrogate without a preceding high surrogate: {code_point:x}"
+                );
+                None
+            }
+        }
+        _ => {
+            lock.pending_surrogate = None;
+            String::from_utf16(&[code_point]).ok()
+        }
+    }
+}
+
 #[inline]
 fn translate_message(handle: HWND, wparam: WPARAM, lparam: LPARAM) {
     let msg = MSG {
@@ -1270,6 +1295,10 @@ where
                 capslock: current_capslock(),
             }))
         }
+        VK_PACKET => {
+            translate_message(handle, wparam, lparam);
+            None
+        }
         VK_CAPITAL => {
             let capslock = current_capslock();
             if state

crates/gpui/src/platform/windows/window.rs 🔗

@@ -43,6 +43,7 @@ pub struct WindowsWindowState {
 
     pub callbacks: Callbacks,
     pub input_handler: Option<PlatformInputHandler>,
+    pub pending_surrogate: Option<u16>,
     pub last_reported_modifiers: Option<Modifiers>,
     pub last_reported_capslock: Option<Capslock>,
     pub system_key_handled: bool,
@@ -105,6 +106,7 @@ impl WindowsWindowState {
         let renderer = windows_renderer::init(gpu_context, hwnd, transparent)?;
         let callbacks = Callbacks::default();
         let input_handler = None;
+        let pending_surrogate = None;
         let last_reported_modifiers = None;
         let last_reported_capslock = None;
         let system_key_handled = false;
@@ -126,6 +128,7 @@ impl WindowsWindowState {
             min_size,
             callbacks,
             input_handler,
+            pending_surrogate,
             last_reported_modifiers,
             last_reported_capslock,
             system_key_handled,