From 58e5d4ff0245368aa36b3274361d51a2a6dbe25b Mon Sep 17 00:00:00 2001 From: Conrad Irwin Date: Mon, 28 Oct 2024 20:27:09 -0600 Subject: [PATCH] Reland invisibles (#19846) Release Notes: - Show invisibles in the editor Relands #19298 Trying to quantify a performance impact, it doesn't seem to impact much visible in Instruments or in a micro-benchmark of Editor#layout_lines. We're still taking a few hundred micro-seconds (+/- a lot) every time. The ascii file has just ascii, where as the cc file has one control character per line. Screenshot 2024-10-28 at 12 14 53 Screenshot 2024-10-28 at 12 15 07 --- crates/editor/src/display_map.rs | 100 ++++++++++++++- crates/editor/src/display_map/invisibles.rs | 129 ++++++++++++++++++++ crates/editor/src/hover_popover.rs | 41 ++++++- crates/gpui/src/text_system/line.rs | 65 ++++++++-- 4 files changed, 319 insertions(+), 16 deletions(-) create mode 100644 crates/editor/src/display_map/invisibles.rs diff --git a/crates/editor/src/display_map.rs b/crates/editor/src/display_map.rs index 559c2321c6208963ec8f27dd1a68c160404698fd..79a2fbdb116be9b31e209ea3f71e514511529a77 100644 --- a/crates/editor/src/display_map.rs +++ b/crates/editor/src/display_map.rs @@ -21,6 +21,7 @@ mod block_map; mod crease_map; mod fold_map; mod inlay_map; +pub(crate) mod invisibles; mod tab_map; mod wrap_map; @@ -42,6 +43,7 @@ use gpui::{ pub(crate) use inlay_map::Inlay; use inlay_map::{InlayMap, InlaySnapshot}; pub use inlay_map::{InlayOffset, InlayPoint}; +use invisibles::{is_invisible, replacement}; use language::{ language_settings::language_settings, ChunkRenderer, OffsetUtf16, Point, Subscription as BufferSubscription, @@ -56,6 +58,7 @@ use std::{ any::TypeId, borrow::Cow, fmt::Debug, + iter, num::NonZeroU32, ops::{Add, Range, Sub}, sync::Arc, @@ -63,7 +66,7 @@ use std::{ use sum_tree::{Bias, TreeMap}; use tab_map::{TabMap, TabSnapshot}; use text::LineIndent; -use ui::WindowContext; +use ui::{div, px, IntoElement, ParentElement, Styled, WindowContext}; use wrap_map::{WrapMap, WrapSnapshot}; #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -461,6 +464,98 @@ pub struct HighlightedChunk<'a> { pub renderer: Option, } +impl<'a> HighlightedChunk<'a> { + fn highlight_invisibles( + self, + editor_style: &'a EditorStyle, + ) -> impl Iterator + 'a { + let mut chars = self.text.chars().peekable(); + let mut text = self.text; + let style = self.style; + let is_tab = self.is_tab; + let renderer = self.renderer; + iter::from_fn(move || { + let mut prefix_len = 0; + while let Some(&ch) = chars.peek() { + if !is_invisible(ch) { + prefix_len += ch.len_utf8(); + chars.next(); + continue; + } + if prefix_len > 0 { + let (prefix, suffix) = text.split_at(prefix_len); + text = suffix; + return Some(HighlightedChunk { + text: prefix, + style, + is_tab, + renderer: renderer.clone(), + }); + } + chars.next(); + let (prefix, suffix) = text.split_at(ch.len_utf8()); + text = suffix; + if let Some(replacement) = replacement(ch) { + let background = editor_style.status.hint_background; + let underline = editor_style.status.hint; + return Some(HighlightedChunk { + text: prefix, + style: None, + is_tab: false, + renderer: Some(ChunkRenderer { + render: Arc::new(move |_| { + div() + .child(replacement) + .bg(background) + .text_decoration_1() + .text_decoration_color(underline) + .into_any_element() + }), + constrain_width: false, + }), + }); + } else { + let invisible_highlight = HighlightStyle { + background_color: Some(editor_style.status.hint_background), + underline: Some(UnderlineStyle { + color: Some(editor_style.status.hint), + thickness: px(1.), + wavy: false, + }), + ..Default::default() + }; + let invisible_style = if let Some(mut style) = style { + style.highlight(invisible_highlight); + style + } else { + invisible_highlight + }; + + return Some(HighlightedChunk { + text: prefix, + style: Some(invisible_style), + is_tab: false, + renderer: renderer.clone(), + }); + } + } + + if !text.is_empty() { + let remainder = text; + text = ""; + Some(HighlightedChunk { + text: remainder, + style, + is_tab, + renderer: renderer.clone(), + }) + } else { + None + } + }) + } +} + #[derive(Clone)] pub struct DisplaySnapshot { pub buffer_snapshot: MultiBufferSnapshot, @@ -675,7 +770,7 @@ impl DisplaySnapshot { suggestion: Some(editor_style.suggestions_style), }, ) - .map(|chunk| { + .flat_map(|chunk| { let mut highlight_style = chunk .syntax_highlight_id .and_then(|id| id.style(&editor_style.syntax)); @@ -718,6 +813,7 @@ impl DisplaySnapshot { is_tab: chunk.is_tab, renderer: chunk.renderer, } + .highlight_invisibles(editor_style) }) } diff --git a/crates/editor/src/display_map/invisibles.rs b/crates/editor/src/display_map/invisibles.rs new file mode 100644 index 0000000000000000000000000000000000000000..794b897603bd66aea86c5733e2d6381cc71495c2 --- /dev/null +++ b/crates/editor/src/display_map/invisibles.rs @@ -0,0 +1,129 @@ +// Invisibility in a Unicode context is not well defined, so we have to guess. +// +// We highlight all ASCII control codes, and unicode whitespace because they are likely +// confused with an ASCII space in a programming context (U+0020). +// +// We also highlight the handful of blank non-space characters: +// U+2800 BRAILLE PATTERN BLANK - Category: So +// U+115F HANGUL CHOSEONG FILLER - Category: Lo +// U+1160 HANGUL CHOSEONG FILLER - Category: Lo +// U+3164 HANGUL FILLER - Category: Lo +// U+FFA0 HALFWIDTH HANGUL FILLER - Category: Lo +// U+FFFC OBJECT REPLACEMENT CHARACTER - Category: So +// +// For the rest of Unicode, invisibility happens for two reasons: +// * A Format character (like a byte order mark or right-to-left override) +// * An invisible Nonspacing Mark character (like U+034F, or variation selectors) +// +// We don't consider unassigned codepoints invisible as the font renderer already shows +// a replacement character in that case (and there are a *lot* of them) +// +// Control characters are mostly fine to highlight; except: +// * U+E0020..=U+E007F are used in emoji flags. We don't highlight them right now, but we could if we tightened our heuristics. +// * U+200D is used to join characters. We highlight this but don't replace it. As our font system ignores mid-glyph highlights this mostly works to highlight unexpected uses. +// +// Nonspacing marks are handled like U+200D. This means that mid-glyph we ignore them, but +// probably causes issues with end-of-glyph usage. +// +// ref: https://invisible-characters.com +// ref: https://www.compart.com/en/unicode/category/Cf +// ref: https://gist.github.com/ConradIrwin/f759e1fc29267143c4c7895aa495dca5?h=1 +// ref: https://unicode.org/Public/emoji/13.0/emoji-test.txt +// https://github.com/bits/UTF-8-Unicode-Test-Documents/blob/master/UTF-8_sequence_separated/utf8_sequence_0-0x10ffff_assigned_including-unprintable-asis.txt +pub fn is_invisible(c: char) -> bool { + if c <= '\u{1f}' { + c != '\t' && c != '\n' && c != '\r' + } else if c >= '\u{7f}' { + c <= '\u{9f}' + || (c.is_whitespace() && c != IDEOGRAPHIC_SPACE) + || contains(c, &FORMAT) + || contains(c, &OTHER) + } else { + false + } +} +// ASCII control characters have fancy unicode glyphs, everything else +// is replaced by a space - unless it is used in combining characters in +// which case we need to leave it in the string. +pub(crate) fn replacement(c: char) -> Option<&'static str> { + if c <= '\x1f' { + Some(C0_SYMBOLS[c as usize]) + } else if c == '\x7f' { + Some(DEL) + } else if contains(c, &PRESERVE) { + None + } else { + Some("\u{2007}") // fixed width space + } +} +// IDEOGRAPHIC SPACE is common alongside Chinese and other wide character sets. +// We don't highlight this for now (as it already shows up wide in the editor), +// but could if we tracked state in the classifier. +const IDEOGRAPHIC_SPACE: char = '\u{3000}'; + +const C0_SYMBOLS: &'static [&'static str] = &[ + "␀", "␁", "␂", "␃", "␄", "␅", "␆", "␇", "␈", "␉", "␊", "␋", "␌", "␍", "␎", "␏", "␐", "␑", "␒", + "␓", "␔", "␕", "␖", "␗", "␘", "␙", "␚", "␛", "␜", "␝", "␞", "␟", +]; +const DEL: &'static str = "␡"; + +// generated using ucd-generate: ucd-generate general-category --include Format --chars ucd-16.0.0 +pub const FORMAT: &'static [(char, char)] = &[ + ('\u{ad}', '\u{ad}'), + ('\u{600}', '\u{605}'), + ('\u{61c}', '\u{61c}'), + ('\u{6dd}', '\u{6dd}'), + ('\u{70f}', '\u{70f}'), + ('\u{890}', '\u{891}'), + ('\u{8e2}', '\u{8e2}'), + ('\u{180e}', '\u{180e}'), + ('\u{200b}', '\u{200f}'), + ('\u{202a}', '\u{202e}'), + ('\u{2060}', '\u{2064}'), + ('\u{2066}', '\u{206f}'), + ('\u{feff}', '\u{feff}'), + ('\u{fff9}', '\u{fffb}'), + ('\u{110bd}', '\u{110bd}'), + ('\u{110cd}', '\u{110cd}'), + ('\u{13430}', '\u{1343f}'), + ('\u{1bca0}', '\u{1bca3}'), + ('\u{1d173}', '\u{1d17a}'), + ('\u{e0001}', '\u{e0001}'), + ('\u{e0020}', '\u{e007f}'), +]; + +// hand-made base on https://invisible-characters.com (Excluding Cf) +pub const OTHER: &'static [(char, char)] = &[ + ('\u{034f}', '\u{034f}'), + ('\u{115F}', '\u{1160}'), + ('\u{17b4}', '\u{17b5}'), + ('\u{180b}', '\u{180d}'), + ('\u{2800}', '\u{2800}'), + ('\u{3164}', '\u{3164}'), + ('\u{fe00}', '\u{fe0d}'), + ('\u{ffa0}', '\u{ffa0}'), + ('\u{fffc}', '\u{fffc}'), + ('\u{e0100}', '\u{e01ef}'), +]; + +// a subset of FORMAT/OTHER that may appear within glyphs +const PRESERVE: &'static [(char, char)] = &[ + ('\u{034f}', '\u{034f}'), + ('\u{200d}', '\u{200d}'), + ('\u{17b4}', '\u{17b5}'), + ('\u{180b}', '\u{180d}'), + ('\u{e0061}', '\u{e007a}'), + ('\u{e007f}', '\u{e007f}'), +]; + +fn contains(c: char, list: &[(char, char)]) -> bool { + for (start, end) in list { + if c < *start { + return false; + } + if c <= *end { + return true; + } + } + false +} diff --git a/crates/editor/src/hover_popover.rs b/crates/editor/src/hover_popover.rs index 9200dd7b8c697c5838fa46e738b19e91289aa83e..fb198c837c3fa5dc7ca9e6eb9cb10c166c7f4cfa 100644 --- a/crates/editor/src/hover_popover.rs +++ b/crates/editor/src/hover_popover.rs @@ -1,5 +1,5 @@ use crate::{ - display_map::{InlayOffset, ToDisplayPoint}, + display_map::{invisibles::is_invisible, InlayOffset, ToDisplayPoint}, hover_links::{InlayHighlight, RangeInEditor}, scroll::ScrollAmount, Anchor, AnchorRangeExt, DisplayPoint, DisplayRow, Editor, EditorSettings, EditorSnapshot, @@ -11,7 +11,7 @@ use gpui::{ StyleRefinement, Styled, Task, TextStyleRefinement, View, ViewContext, }; use itertools::Itertools; -use language::{DiagnosticEntry, Language, LanguageRegistry}; +use language::{Diagnostic, DiagnosticEntry, Language, LanguageRegistry}; use lsp::DiagnosticSeverity; use markdown::{Markdown, MarkdownStyle}; use multi_buffer::ToOffset; @@ -259,7 +259,7 @@ fn show_hover( } // If there's a diagnostic, assign it on the hover state and notify - let local_diagnostic = snapshot + let mut local_diagnostic = snapshot .buffer_snapshot .diagnostics_in_range::<_, usize>(anchor..anchor, false) // Find the entry with the most specific range @@ -280,6 +280,41 @@ fn show_hover( range: entry.range.to_anchors(&snapshot.buffer_snapshot), }) }); + if let Some(invisible) = snapshot + .buffer_snapshot + .chars_at(anchor) + .next() + .filter(|&c| is_invisible(c)) + { + let after = snapshot.buffer_snapshot.anchor_after( + anchor.to_offset(&snapshot.buffer_snapshot) + invisible.len_utf8(), + ); + local_diagnostic = Some(DiagnosticEntry { + diagnostic: Diagnostic { + severity: DiagnosticSeverity::HINT, + message: format!("Unicode character U+{:02X}", invisible as u32), + ..Default::default() + }, + range: anchor..after, + }) + } else if let Some(invisible) = snapshot + .buffer_snapshot + .reversed_chars_at(anchor) + .next() + .filter(|&c| is_invisible(c)) + { + let before = snapshot.buffer_snapshot.anchor_before( + anchor.to_offset(&snapshot.buffer_snapshot) - invisible.len_utf8(), + ); + local_diagnostic = Some(DiagnosticEntry { + diagnostic: Diagnostic { + severity: DiagnosticSeverity::HINT, + message: format!("Unicode character U+{:02X}", invisible as u32), + ..Default::default() + }, + range: before..anchor, + }) + } let diagnostic_popover = if let Some(local_diagnostic) = local_diagnostic { let text = match local_diagnostic.diagnostic.source { diff --git a/crates/gpui/src/text_system/line.rs b/crates/gpui/src/text_system/line.rs index 240654e57e1488301956768485d61b34a5cb56f0..b8b698a0427a68cfe58a405075b593a5ac4c8ab3 100644 --- a/crates/gpui/src/text_system/line.rs +++ b/crates/gpui/src/text_system/line.rs @@ -1,6 +1,7 @@ use crate::{ - black, fill, point, px, size, Bounds, Hsla, LineLayout, Pixels, Point, Result, SharedString, - StrikethroughStyle, UnderlineStyle, WindowContext, WrapBoundary, WrappedLineLayout, + black, fill, point, px, size, Bounds, Half, Hsla, LineLayout, Pixels, Point, Result, + SharedString, StrikethroughStyle, UnderlineStyle, WindowContext, WrapBoundary, + WrappedLineLayout, }; use derive_more::{Deref, DerefMut}; use smallvec::SmallVec; @@ -129,8 +130,9 @@ fn paint_line( let text_system = cx.text_system().clone(); let mut glyph_origin = origin; let mut prev_glyph_position = Point::default(); + let mut max_glyph_size = size(px(0.), px(0.)); for (run_ix, run) in layout.runs.iter().enumerate() { - let max_glyph_size = text_system.bounding_box(run.font_id, layout.font_size).size; + max_glyph_size = text_system.bounding_box(run.font_id, layout.font_size).size; for (glyph_ix, glyph) in run.glyphs.iter().enumerate() { glyph_origin.x += glyph.position.x - prev_glyph_position.x; @@ -139,6 +141,9 @@ fn paint_line( wraps.next(); if let Some((background_origin, background_color)) = current_background.as_mut() { + if glyph_origin.x == background_origin.x { + background_origin.x -= max_glyph_size.width.half() + } cx.paint_quad(fill( Bounds { origin: *background_origin, @@ -150,6 +155,9 @@ fn paint_line( background_origin.y += line_height; } if let Some((underline_origin, underline_style)) = current_underline.as_mut() { + if glyph_origin.x == underline_origin.x { + underline_origin.x -= max_glyph_size.width.half(); + }; cx.paint_underline( *underline_origin, glyph_origin.x - underline_origin.x, @@ -161,6 +169,9 @@ fn paint_line( if let Some((strikethrough_origin, strikethrough_style)) = current_strikethrough.as_mut() { + if glyph_origin.x == strikethrough_origin.x { + strikethrough_origin.x -= max_glyph_size.width.half(); + }; cx.paint_strikethrough( *strikethrough_origin, glyph_origin.x - strikethrough_origin.x, @@ -179,7 +190,18 @@ fn paint_line( let mut finished_underline: Option<(Point, UnderlineStyle)> = None; let mut finished_strikethrough: Option<(Point, StrikethroughStyle)> = None; if glyph.index >= run_end { - if let Some(style_run) = decoration_runs.next() { + let mut style_run = decoration_runs.next(); + + // ignore style runs that apply to a partial glyph + while let Some(run) = style_run { + if glyph.index < run_end + (run.len as usize) { + break; + } + run_end += run.len as usize; + style_run = decoration_runs.next(); + } + + if let Some(style_run) = style_run { if let Some((_, background_color)) = &mut current_background { if style_run.background_color.as_ref() != Some(background_color) { finished_background = current_background.take(); @@ -239,17 +261,24 @@ fn paint_line( } } - if let Some((background_origin, background_color)) = finished_background { + if let Some((mut background_origin, background_color)) = finished_background { + let mut width = glyph_origin.x - background_origin.x; + if background_origin.x == glyph_origin.x { + background_origin.x -= max_glyph_size.width.half(); + }; cx.paint_quad(fill( Bounds { origin: background_origin, - size: size(glyph_origin.x - background_origin.x, line_height), + size: size(width, line_height), }, background_color, )); } - if let Some((underline_origin, underline_style)) = finished_underline { + if let Some((mut underline_origin, underline_style)) = finished_underline { + if underline_origin.x == glyph_origin.x { + underline_origin.x -= max_glyph_size.width.half(); + }; cx.paint_underline( underline_origin, glyph_origin.x - underline_origin.x, @@ -257,7 +286,12 @@ fn paint_line( ); } - if let Some((strikethrough_origin, strikethrough_style)) = finished_strikethrough { + if let Some((mut strikethrough_origin, strikethrough_style)) = + finished_strikethrough + { + if strikethrough_origin.x == glyph_origin.x { + strikethrough_origin.x -= max_glyph_size.width.half(); + }; cx.paint_strikethrough( strikethrough_origin, glyph_origin.x - strikethrough_origin.x, @@ -299,7 +333,10 @@ fn paint_line( last_line_end_x -= glyph.position.x; } - if let Some((background_origin, background_color)) = current_background.take() { + if let Some((mut background_origin, background_color)) = current_background.take() { + if last_line_end_x == background_origin.x { + background_origin.x -= max_glyph_size.width.half() + }; cx.paint_quad(fill( Bounds { origin: background_origin, @@ -309,7 +346,10 @@ fn paint_line( )); } - if let Some((underline_start, underline_style)) = current_underline.take() { + if let Some((mut underline_start, underline_style)) = current_underline.take() { + if last_line_end_x == underline_start.x { + underline_start.x -= max_glyph_size.width.half() + }; cx.paint_underline( underline_start, last_line_end_x - underline_start.x, @@ -317,7 +357,10 @@ fn paint_line( ); } - if let Some((strikethrough_start, strikethrough_style)) = current_strikethrough.take() { + if let Some((mut strikethrough_start, strikethrough_style)) = current_strikethrough.take() { + if last_line_end_x == strikethrough_start.x { + strikethrough_start.x -= max_glyph_size.width.half() + }; cx.paint_strikethrough( strikethrough_start, last_line_end_x - strikethrough_start.x,