Reland invisibles (#19846)

Conrad Irwin created

Release Notes:

- Show invisibles in the editor

Relands #19298 

Trying to quantify a performance impact, it doesn't seem to impact much
visible in Instruments or in a micro-benchmark of Editor#layout_lines.
We're still taking a few hundred micro-seconds (+/- a lot) every time.
The ascii file has just ascii, where as the cc file has one control
character per line.

<img width="1055" alt="Screenshot 2024-10-28 at 12 14 53"
src="https://github.com/user-attachments/assets/1c382063-bb19-4e92-bbba-ed5e7c02309f">
<img width="1020" alt="Screenshot 2024-10-28 at 12 15 07"
src="https://github.com/user-attachments/assets/1789f65e-5f83-4c32-be47-7748c62c3703">

Change summary

crates/editor/src/display_map.rs            | 100 +++++++++++++++++
crates/editor/src/display_map/invisibles.rs | 129 +++++++++++++++++++++++
crates/editor/src/hover_popover.rs          |  41 ++++++
crates/gpui/src/text_system/line.rs         |  65 +++++++++-
4 files changed, 319 insertions(+), 16 deletions(-)

Detailed changes

crates/editor/src/display_map.rs 🔗

@@ -21,6 +21,7 @@ mod block_map;
 mod crease_map;
 mod fold_map;
 mod inlay_map;
+pub(crate) mod invisibles;
 mod tab_map;
 mod wrap_map;
 
@@ -42,6 +43,7 @@ use gpui::{
 pub(crate) use inlay_map::Inlay;
 use inlay_map::{InlayMap, InlaySnapshot};
 pub use inlay_map::{InlayOffset, InlayPoint};
+use invisibles::{is_invisible, replacement};
 use language::{
     language_settings::language_settings, ChunkRenderer, OffsetUtf16, Point,
     Subscription as BufferSubscription,
@@ -56,6 +58,7 @@ use std::{
     any::TypeId,
     borrow::Cow,
     fmt::Debug,
+    iter,
     num::NonZeroU32,
     ops::{Add, Range, Sub},
     sync::Arc,
@@ -63,7 +66,7 @@ use std::{
 use sum_tree::{Bias, TreeMap};
 use tab_map::{TabMap, TabSnapshot};
 use text::LineIndent;
-use ui::WindowContext;
+use ui::{div, px, IntoElement, ParentElement, Styled, WindowContext};
 use wrap_map::{WrapMap, WrapSnapshot};
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -461,6 +464,98 @@ pub struct HighlightedChunk<'a> {
     pub renderer: Option<ChunkRenderer>,
 }
 
+impl<'a> HighlightedChunk<'a> {
+    fn highlight_invisibles(
+        self,
+        editor_style: &'a EditorStyle,
+    ) -> impl Iterator<Item = Self> + 'a {
+        let mut chars = self.text.chars().peekable();
+        let mut text = self.text;
+        let style = self.style;
+        let is_tab = self.is_tab;
+        let renderer = self.renderer;
+        iter::from_fn(move || {
+            let mut prefix_len = 0;
+            while let Some(&ch) = chars.peek() {
+                if !is_invisible(ch) {
+                    prefix_len += ch.len_utf8();
+                    chars.next();
+                    continue;
+                }
+                if prefix_len > 0 {
+                    let (prefix, suffix) = text.split_at(prefix_len);
+                    text = suffix;
+                    return Some(HighlightedChunk {
+                        text: prefix,
+                        style,
+                        is_tab,
+                        renderer: renderer.clone(),
+                    });
+                }
+                chars.next();
+                let (prefix, suffix) = text.split_at(ch.len_utf8());
+                text = suffix;
+                if let Some(replacement) = replacement(ch) {
+                    let background = editor_style.status.hint_background;
+                    let underline = editor_style.status.hint;
+                    return Some(HighlightedChunk {
+                        text: prefix,
+                        style: None,
+                        is_tab: false,
+                        renderer: Some(ChunkRenderer {
+                            render: Arc::new(move |_| {
+                                div()
+                                    .child(replacement)
+                                    .bg(background)
+                                    .text_decoration_1()
+                                    .text_decoration_color(underline)
+                                    .into_any_element()
+                            }),
+                            constrain_width: false,
+                        }),
+                    });
+                } else {
+                    let invisible_highlight = HighlightStyle {
+                        background_color: Some(editor_style.status.hint_background),
+                        underline: Some(UnderlineStyle {
+                            color: Some(editor_style.status.hint),
+                            thickness: px(1.),
+                            wavy: false,
+                        }),
+                        ..Default::default()
+                    };
+                    let invisible_style = if let Some(mut style) = style {
+                        style.highlight(invisible_highlight);
+                        style
+                    } else {
+                        invisible_highlight
+                    };
+
+                    return Some(HighlightedChunk {
+                        text: prefix,
+                        style: Some(invisible_style),
+                        is_tab: false,
+                        renderer: renderer.clone(),
+                    });
+                }
+            }
+
+            if !text.is_empty() {
+                let remainder = text;
+                text = "";
+                Some(HighlightedChunk {
+                    text: remainder,
+                    style,
+                    is_tab,
+                    renderer: renderer.clone(),
+                })
+            } else {
+                None
+            }
+        })
+    }
+}
+
 #[derive(Clone)]
 pub struct DisplaySnapshot {
     pub buffer_snapshot: MultiBufferSnapshot,
@@ -675,7 +770,7 @@ impl DisplaySnapshot {
                 suggestion: Some(editor_style.suggestions_style),
             },
         )
-        .map(|chunk| {
+        .flat_map(|chunk| {
             let mut highlight_style = chunk
                 .syntax_highlight_id
                 .and_then(|id| id.style(&editor_style.syntax));
@@ -718,6 +813,7 @@ impl DisplaySnapshot {
                 is_tab: chunk.is_tab,
                 renderer: chunk.renderer,
             }
+            .highlight_invisibles(editor_style)
         })
     }
 

crates/editor/src/display_map/invisibles.rs 🔗

@@ -0,0 +1,129 @@
+// Invisibility in a Unicode context is not well defined, so we have to guess.
+//
+// We highlight all ASCII control codes, and unicode whitespace because they are likely
+// confused with an ASCII space in a programming context (U+0020).
+//
+// We also highlight the handful of blank non-space characters:
+//   U+2800 BRAILLE PATTERN BLANK - Category: So
+//   U+115F HANGUL CHOSEONG FILLER - Category: Lo
+//   U+1160 HANGUL CHOSEONG FILLER - Category: Lo
+//   U+3164 HANGUL FILLER - Category: Lo
+//   U+FFA0 HALFWIDTH HANGUL FILLER - Category: Lo
+//   U+FFFC OBJECT REPLACEMENT CHARACTER - Category: So
+//
+// For the rest of Unicode, invisibility happens for two reasons:
+// * A Format character (like a byte order mark or right-to-left override)
+// * An invisible Nonspacing Mark character (like U+034F, or variation selectors)
+//
+// We don't consider unassigned codepoints invisible as the font renderer already shows
+// a replacement character in that case (and there are a *lot* of them)
+//
+// Control characters are mostly fine to highlight; except:
+// * U+E0020..=U+E007F are used in emoji flags. We don't highlight them right now, but we could if we tightened our heuristics.
+// * U+200D is used to join characters. We highlight this but don't replace it. As our font system ignores mid-glyph highlights this mostly works to highlight unexpected uses.
+//
+// Nonspacing marks are handled like U+200D. This means that mid-glyph we ignore them, but
+// probably causes issues with end-of-glyph usage.
+//
+// ref: https://invisible-characters.com
+// ref: https://www.compart.com/en/unicode/category/Cf
+// ref: https://gist.github.com/ConradIrwin/f759e1fc29267143c4c7895aa495dca5?h=1
+// ref: https://unicode.org/Public/emoji/13.0/emoji-test.txt
+// https://github.com/bits/UTF-8-Unicode-Test-Documents/blob/master/UTF-8_sequence_separated/utf8_sequence_0-0x10ffff_assigned_including-unprintable-asis.txt
+pub fn is_invisible(c: char) -> bool {
+    if c <= '\u{1f}' {
+        c != '\t' && c != '\n' && c != '\r'
+    } else if c >= '\u{7f}' {
+        c <= '\u{9f}'
+            || (c.is_whitespace() && c != IDEOGRAPHIC_SPACE)
+            || contains(c, &FORMAT)
+            || contains(c, &OTHER)
+    } else {
+        false
+    }
+}
+// ASCII control characters have fancy unicode glyphs, everything else
+// is replaced by a space - unless it is used in combining characters in
+// which case we need to leave it in the string.
+pub(crate) fn replacement(c: char) -> Option<&'static str> {
+    if c <= '\x1f' {
+        Some(C0_SYMBOLS[c as usize])
+    } else if c == '\x7f' {
+        Some(DEL)
+    } else if contains(c, &PRESERVE) {
+        None
+    } else {
+        Some("\u{2007}") // fixed width space
+    }
+}
+// IDEOGRAPHIC SPACE is common alongside Chinese and other wide character sets.
+// We don't highlight this for now (as it already shows up wide in the editor),
+// but could if we tracked state in the classifier.
+const IDEOGRAPHIC_SPACE: char = '\u{3000}';
+
+const C0_SYMBOLS: &'static [&'static str] = &[
+    "␀", "␁", "␂", "␃", "␄", "␅", "␆", "␇", "␈", "␉", "␊", "␋", "␌", "␍", "␎", "␏", "␐", "␑", "␒",
+    "␓", "␔", "␕", "␖", "␗", "␘", "␙", "␚", "␛", "␜", "␝", "␞", "␟",
+];
+const DEL: &'static str = "␡";
+
+// generated using ucd-generate: ucd-generate general-category --include Format --chars ucd-16.0.0
+pub const FORMAT: &'static [(char, char)] = &[
+    ('\u{ad}', '\u{ad}'),
+    ('\u{600}', '\u{605}'),
+    ('\u{61c}', '\u{61c}'),
+    ('\u{6dd}', '\u{6dd}'),
+    ('\u{70f}', '\u{70f}'),
+    ('\u{890}', '\u{891}'),
+    ('\u{8e2}', '\u{8e2}'),
+    ('\u{180e}', '\u{180e}'),
+    ('\u{200b}', '\u{200f}'),
+    ('\u{202a}', '\u{202e}'),
+    ('\u{2060}', '\u{2064}'),
+    ('\u{2066}', '\u{206f}'),
+    ('\u{feff}', '\u{feff}'),
+    ('\u{fff9}', '\u{fffb}'),
+    ('\u{110bd}', '\u{110bd}'),
+    ('\u{110cd}', '\u{110cd}'),
+    ('\u{13430}', '\u{1343f}'),
+    ('\u{1bca0}', '\u{1bca3}'),
+    ('\u{1d173}', '\u{1d17a}'),
+    ('\u{e0001}', '\u{e0001}'),
+    ('\u{e0020}', '\u{e007f}'),
+];
+
+// hand-made base on https://invisible-characters.com (Excluding Cf)
+pub const OTHER: &'static [(char, char)] = &[
+    ('\u{034f}', '\u{034f}'),
+    ('\u{115F}', '\u{1160}'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{2800}', '\u{2800}'),
+    ('\u{3164}', '\u{3164}'),
+    ('\u{fe00}', '\u{fe0d}'),
+    ('\u{ffa0}', '\u{ffa0}'),
+    ('\u{fffc}', '\u{fffc}'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
+
+// a subset of FORMAT/OTHER that may appear within glyphs
+const PRESERVE: &'static [(char, char)] = &[
+    ('\u{034f}', '\u{034f}'),
+    ('\u{200d}', '\u{200d}'),
+    ('\u{17b4}', '\u{17b5}'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{e0061}', '\u{e007a}'),
+    ('\u{e007f}', '\u{e007f}'),
+];
+
+fn contains(c: char, list: &[(char, char)]) -> bool {
+    for (start, end) in list {
+        if c < *start {
+            return false;
+        }
+        if c <= *end {
+            return true;
+        }
+    }
+    false
+}

crates/editor/src/hover_popover.rs 🔗

@@ -1,5 +1,5 @@
 use crate::{
-    display_map::{InlayOffset, ToDisplayPoint},
+    display_map::{invisibles::is_invisible, InlayOffset, ToDisplayPoint},
     hover_links::{InlayHighlight, RangeInEditor},
     scroll::ScrollAmount,
     Anchor, AnchorRangeExt, DisplayPoint, DisplayRow, Editor, EditorSettings, EditorSnapshot,
@@ -11,7 +11,7 @@ use gpui::{
     StyleRefinement, Styled, Task, TextStyleRefinement, View, ViewContext,
 };
 use itertools::Itertools;
-use language::{DiagnosticEntry, Language, LanguageRegistry};
+use language::{Diagnostic, DiagnosticEntry, Language, LanguageRegistry};
 use lsp::DiagnosticSeverity;
 use markdown::{Markdown, MarkdownStyle};
 use multi_buffer::ToOffset;
@@ -259,7 +259,7 @@ fn show_hover(
             }
 
             // If there's a diagnostic, assign it on the hover state and notify
-            let local_diagnostic = snapshot
+            let mut local_diagnostic = snapshot
                 .buffer_snapshot
                 .diagnostics_in_range::<_, usize>(anchor..anchor, false)
                 // Find the entry with the most specific range
@@ -280,6 +280,41 @@ fn show_hover(
                         range: entry.range.to_anchors(&snapshot.buffer_snapshot),
                     })
             });
+            if let Some(invisible) = snapshot
+                .buffer_snapshot
+                .chars_at(anchor)
+                .next()
+                .filter(|&c| is_invisible(c))
+            {
+                let after = snapshot.buffer_snapshot.anchor_after(
+                    anchor.to_offset(&snapshot.buffer_snapshot) + invisible.len_utf8(),
+                );
+                local_diagnostic = Some(DiagnosticEntry {
+                    diagnostic: Diagnostic {
+                        severity: DiagnosticSeverity::HINT,
+                        message: format!("Unicode character U+{:02X}", invisible as u32),
+                        ..Default::default()
+                    },
+                    range: anchor..after,
+                })
+            } else if let Some(invisible) = snapshot
+                .buffer_snapshot
+                .reversed_chars_at(anchor)
+                .next()
+                .filter(|&c| is_invisible(c))
+            {
+                let before = snapshot.buffer_snapshot.anchor_before(
+                    anchor.to_offset(&snapshot.buffer_snapshot) - invisible.len_utf8(),
+                );
+                local_diagnostic = Some(DiagnosticEntry {
+                    diagnostic: Diagnostic {
+                        severity: DiagnosticSeverity::HINT,
+                        message: format!("Unicode character U+{:02X}", invisible as u32),
+                        ..Default::default()
+                    },
+                    range: before..anchor,
+                })
+            }
 
             let diagnostic_popover = if let Some(local_diagnostic) = local_diagnostic {
                 let text = match local_diagnostic.diagnostic.source {

crates/gpui/src/text_system/line.rs 🔗

@@ -1,6 +1,7 @@
 use crate::{
-    black, fill, point, px, size, Bounds, Hsla, LineLayout, Pixels, Point, Result, SharedString,
-    StrikethroughStyle, UnderlineStyle, WindowContext, WrapBoundary, WrappedLineLayout,
+    black, fill, point, px, size, Bounds, Half, Hsla, LineLayout, Pixels, Point, Result,
+    SharedString, StrikethroughStyle, UnderlineStyle, WindowContext, WrapBoundary,
+    WrappedLineLayout,
 };
 use derive_more::{Deref, DerefMut};
 use smallvec::SmallVec;
@@ -129,8 +130,9 @@ fn paint_line(
         let text_system = cx.text_system().clone();
         let mut glyph_origin = origin;
         let mut prev_glyph_position = Point::default();
+        let mut max_glyph_size = size(px(0.), px(0.));
         for (run_ix, run) in layout.runs.iter().enumerate() {
-            let max_glyph_size = text_system.bounding_box(run.font_id, layout.font_size).size;
+            max_glyph_size = text_system.bounding_box(run.font_id, layout.font_size).size;
 
             for (glyph_ix, glyph) in run.glyphs.iter().enumerate() {
                 glyph_origin.x += glyph.position.x - prev_glyph_position.x;
@@ -139,6 +141,9 @@ fn paint_line(
                     wraps.next();
                     if let Some((background_origin, background_color)) = current_background.as_mut()
                     {
+                        if glyph_origin.x == background_origin.x {
+                            background_origin.x -= max_glyph_size.width.half()
+                        }
                         cx.paint_quad(fill(
                             Bounds {
                                 origin: *background_origin,
@@ -150,6 +155,9 @@ fn paint_line(
                         background_origin.y += line_height;
                     }
                     if let Some((underline_origin, underline_style)) = current_underline.as_mut() {
+                        if glyph_origin.x == underline_origin.x {
+                            underline_origin.x -= max_glyph_size.width.half();
+                        };
                         cx.paint_underline(
                             *underline_origin,
                             glyph_origin.x - underline_origin.x,
@@ -161,6 +169,9 @@ fn paint_line(
                     if let Some((strikethrough_origin, strikethrough_style)) =
                         current_strikethrough.as_mut()
                     {
+                        if glyph_origin.x == strikethrough_origin.x {
+                            strikethrough_origin.x -= max_glyph_size.width.half();
+                        };
                         cx.paint_strikethrough(
                             *strikethrough_origin,
                             glyph_origin.x - strikethrough_origin.x,
@@ -179,7 +190,18 @@ fn paint_line(
                 let mut finished_underline: Option<(Point<Pixels>, UnderlineStyle)> = None;
                 let mut finished_strikethrough: Option<(Point<Pixels>, StrikethroughStyle)> = None;
                 if glyph.index >= run_end {
-                    if let Some(style_run) = decoration_runs.next() {
+                    let mut style_run = decoration_runs.next();
+
+                    // ignore style runs that apply to a partial glyph
+                    while let Some(run) = style_run {
+                        if glyph.index < run_end + (run.len as usize) {
+                            break;
+                        }
+                        run_end += run.len as usize;
+                        style_run = decoration_runs.next();
+                    }
+
+                    if let Some(style_run) = style_run {
                         if let Some((_, background_color)) = &mut current_background {
                             if style_run.background_color.as_ref() != Some(background_color) {
                                 finished_background = current_background.take();
@@ -239,17 +261,24 @@ fn paint_line(
                     }
                 }
 
-                if let Some((background_origin, background_color)) = finished_background {
+                if let Some((mut background_origin, background_color)) = finished_background {
+                    let mut width = glyph_origin.x - background_origin.x;
+                    if background_origin.x == glyph_origin.x {
+                        background_origin.x -= max_glyph_size.width.half();
+                    };
                     cx.paint_quad(fill(
                         Bounds {
                             origin: background_origin,
-                            size: size(glyph_origin.x - background_origin.x, line_height),
+                            size: size(width, line_height),
                         },
                         background_color,
                     ));
                 }
 
-                if let Some((underline_origin, underline_style)) = finished_underline {
+                if let Some((mut underline_origin, underline_style)) = finished_underline {
+                    if underline_origin.x == glyph_origin.x {
+                        underline_origin.x -= max_glyph_size.width.half();
+                    };
                     cx.paint_underline(
                         underline_origin,
                         glyph_origin.x - underline_origin.x,
@@ -257,7 +286,12 @@ fn paint_line(
                     );
                 }
 
-                if let Some((strikethrough_origin, strikethrough_style)) = finished_strikethrough {
+                if let Some((mut strikethrough_origin, strikethrough_style)) =
+                    finished_strikethrough
+                {
+                    if strikethrough_origin.x == glyph_origin.x {
+                        strikethrough_origin.x -= max_glyph_size.width.half();
+                    };
                     cx.paint_strikethrough(
                         strikethrough_origin,
                         glyph_origin.x - strikethrough_origin.x,
@@ -299,7 +333,10 @@ fn paint_line(
             last_line_end_x -= glyph.position.x;
         }
 
-        if let Some((background_origin, background_color)) = current_background.take() {
+        if let Some((mut background_origin, background_color)) = current_background.take() {
+            if last_line_end_x == background_origin.x {
+                background_origin.x -= max_glyph_size.width.half()
+            };
             cx.paint_quad(fill(
                 Bounds {
                     origin: background_origin,
@@ -309,7 +346,10 @@ fn paint_line(
             ));
         }
 
-        if let Some((underline_start, underline_style)) = current_underline.take() {
+        if let Some((mut underline_start, underline_style)) = current_underline.take() {
+            if last_line_end_x == underline_start.x {
+                underline_start.x -= max_glyph_size.width.half()
+            };
             cx.paint_underline(
                 underline_start,
                 last_line_end_x - underline_start.x,
@@ -317,7 +357,10 @@ fn paint_line(
             );
         }
 
-        if let Some((strikethrough_start, strikethrough_style)) = current_strikethrough.take() {
+        if let Some((mut strikethrough_start, strikethrough_style)) = current_strikethrough.take() {
+            if last_line_end_x == strikethrough_start.x {
+                strikethrough_start.x -= max_glyph_size.width.half()
+            };
             cx.paint_strikethrough(
                 strikethrough_start,
                 last_line_end_x - strikethrough_start.x,