line_wrapper.rs

  1use crate::{px, FontId, FontRun, Pixels, PlatformTextSystem};
  2use collections::HashMap;
  3use std::{iter, sync::Arc};
  4
  5/// The GPUI line wrapper, used to wrap lines of text to a given width.
  6pub struct LineWrapper {
  7    platform_text_system: Arc<dyn PlatformTextSystem>,
  8    pub(crate) font_id: FontId,
  9    pub(crate) font_size: Pixels,
 10    cached_ascii_char_widths: [Option<Pixels>; 128],
 11    cached_other_char_widths: HashMap<char, Pixels>,
 12}
 13
 14impl LineWrapper {
 15    /// The maximum indent that can be applied to a line.
 16    pub const MAX_INDENT: u32 = 256;
 17
 18    pub(crate) fn new(
 19        font_id: FontId,
 20        font_size: Pixels,
 21        text_system: Arc<dyn PlatformTextSystem>,
 22    ) -> Self {
 23        Self {
 24            platform_text_system: text_system,
 25            font_id,
 26            font_size,
 27            cached_ascii_char_widths: [None; 128],
 28            cached_other_char_widths: HashMap::default(),
 29        }
 30    }
 31
 32    /// Wrap a line of text to the given width with this wrapper's font and font size.
 33    pub fn wrap_line<'a>(
 34        &'a mut self,
 35        line: &'a str,
 36        wrap_width: Pixels,
 37    ) -> impl Iterator<Item = Boundary> + 'a {
 38        let mut width = px(0.);
 39        let mut first_non_whitespace_ix = None;
 40        let mut indent = None;
 41        let mut last_candidate_ix = 0;
 42        let mut last_candidate_width = px(0.);
 43        let mut last_wrap_ix = 0;
 44        let mut prev_c = '\0';
 45        let mut char_indices = line.char_indices();
 46        iter::from_fn(move || {
 47            for (ix, c) in char_indices.by_ref() {
 48                if c == '\n' {
 49                    continue;
 50                }
 51
 52                if Self::is_word_char(c) {
 53                    if prev_c == ' ' && c != ' ' && first_non_whitespace_ix.is_some() {
 54                        last_candidate_ix = ix;
 55                        last_candidate_width = width;
 56                    }
 57                } else {
 58                    // CJK may not be space separated, e.g.: `Hello worldδ½ ε₯½δΈ–η•Œ`
 59                    if c != ' ' && first_non_whitespace_ix.is_some() {
 60                        last_candidate_ix = ix;
 61                        last_candidate_width = width;
 62                    }
 63                }
 64
 65                if c != ' ' && first_non_whitespace_ix.is_none() {
 66                    first_non_whitespace_ix = Some(ix);
 67                }
 68
 69                let char_width = self.width_for_char(c);
 70                width += char_width;
 71                if width > wrap_width && ix > last_wrap_ix {
 72                    if let (None, Some(first_non_whitespace_ix)) = (indent, first_non_whitespace_ix)
 73                    {
 74                        indent = Some(
 75                            Self::MAX_INDENT.min((first_non_whitespace_ix - last_wrap_ix) as u32),
 76                        );
 77                    }
 78
 79                    if last_candidate_ix > 0 {
 80                        last_wrap_ix = last_candidate_ix;
 81                        width -= last_candidate_width;
 82                        last_candidate_ix = 0;
 83                    } else {
 84                        last_wrap_ix = ix;
 85                        width = char_width;
 86                    }
 87
 88                    if let Some(indent) = indent {
 89                        width += self.width_for_char(' ') * indent as f32;
 90                    }
 91
 92                    return Some(Boundary::new(last_wrap_ix, indent.unwrap_or(0)));
 93                }
 94                prev_c = c;
 95            }
 96
 97            None
 98        })
 99    }
100
101    pub(crate) fn is_word_char(c: char) -> bool {
102        // ASCII alphanumeric characters, for English, numbers: `Hello123`, etc.
103        c.is_ascii_alphanumeric() ||
104        // Latin script in Unicode for French, German, Spanish, etc.
105        // Latin-1 Supplement
106        // https://en.wikipedia.org/wiki/Latin-1_Supplement
107        matches!(c, '\u{00C0}'..='\u{00FF}') ||
108        // Latin Extended-A
109        // https://en.wikipedia.org/wiki/Latin_Extended-A
110        matches!(c, '\u{0100}'..='\u{017F}') ||
111        // Latin Extended-B
112        // https://en.wikipedia.org/wiki/Latin_Extended-B
113        matches!(c, '\u{0180}'..='\u{024F}') ||
114        // Cyrillic for Russian, Ukrainian, etc.
115        // https://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode
116        matches!(c, '\u{0400}'..='\u{04FF}') ||
117        // Some other known special characters that should be treated as word characters,
118        // e.g. `a-b`, `var_name`, `I'm`, '@mention`, `#hashtag`, `100%`, `3.1415`, `2^3`, `a~b`, etc.
119        matches!(c, '-' | '_' | '.' | '\'' | '$' | '%' | '@' | '#' | '^' | '~') ||
120        // Characters that used in URL, e.g. `https://github.com/zed-industries/zed?a=1&b=2` for better wrapping a long URL.
121        matches!(c,  '/' | ':' | '?' | '&' | '=') ||
122        // `β‹―` character is special used in Zed, to keep this at the end of the line.
123        matches!(c, 'β‹―')
124    }
125
126    #[inline(always)]
127    fn width_for_char(&mut self, c: char) -> Pixels {
128        if (c as u32) < 128 {
129            if let Some(cached_width) = self.cached_ascii_char_widths[c as usize] {
130                cached_width
131            } else {
132                let width = self.compute_width_for_char(c);
133                self.cached_ascii_char_widths[c as usize] = Some(width);
134                width
135            }
136        } else if let Some(cached_width) = self.cached_other_char_widths.get(&c) {
137            *cached_width
138        } else {
139            let width = self.compute_width_for_char(c);
140            self.cached_other_char_widths.insert(c, width);
141            width
142        }
143    }
144
145    fn compute_width_for_char(&self, c: char) -> Pixels {
146        let mut buffer = [0; 4];
147        let buffer = c.encode_utf8(&mut buffer);
148        self.platform_text_system
149            .layout_line(
150                buffer,
151                self.font_size,
152                &[FontRun {
153                    len: buffer.len(),
154                    font_id: self.font_id,
155                }],
156            )
157            .width
158    }
159}
160
161/// A boundary between two lines of text.
162#[derive(Copy, Clone, Debug, PartialEq, Eq)]
163pub struct Boundary {
164    /// The index of the last character in a line
165    pub ix: usize,
166    /// The indent of the next line.
167    pub next_indent: u32,
168}
169
170impl Boundary {
171    fn new(ix: usize, next_indent: u32) -> Self {
172        Self { ix, next_indent }
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use crate::{font, TestAppContext, TestDispatcher};
180    #[cfg(target_os = "macos")]
181    use crate::{TextRun, WindowTextSystem, WrapBoundary};
182    use rand::prelude::*;
183
184    #[test]
185    fn test_wrap_line() {
186        let dispatcher = TestDispatcher::new(StdRng::seed_from_u64(0));
187        let cx = TestAppContext::new(dispatcher, None);
188        cx.text_system()
189            .add_fonts(vec![std::fs::read(
190                "../../assets/fonts/plex-mono/ZedPlexMono-Regular.ttf",
191            )
192            .unwrap()
193            .into()])
194            .unwrap();
195        let id = cx.text_system().font_id(&font("Zed Plex Mono")).unwrap();
196
197        cx.update(|cx| {
198            let text_system = cx.text_system().clone();
199            let mut wrapper =
200                LineWrapper::new(id, px(16.), text_system.platform_text_system.clone());
201            assert_eq!(
202                wrapper
203                    .wrap_line("aa bbb cccc ddddd eeee", px(72.))
204                    .collect::<Vec<_>>(),
205                &[
206                    Boundary::new(7, 0),
207                    Boundary::new(12, 0),
208                    Boundary::new(18, 0)
209                ],
210            );
211            assert_eq!(
212                wrapper
213                    .wrap_line("aaa aaaaaaaaaaaaaaaaaa", px(72.0))
214                    .collect::<Vec<_>>(),
215                &[
216                    Boundary::new(4, 0),
217                    Boundary::new(11, 0),
218                    Boundary::new(18, 0)
219                ],
220            );
221            assert_eq!(
222                wrapper
223                    .wrap_line("     aaaaaaa", px(72.))
224                    .collect::<Vec<_>>(),
225                &[
226                    Boundary::new(7, 5),
227                    Boundary::new(9, 5),
228                    Boundary::new(11, 5),
229                ]
230            );
231            assert_eq!(
232                wrapper
233                    .wrap_line("                            ", px(72.))
234                    .collect::<Vec<_>>(),
235                &[
236                    Boundary::new(7, 0),
237                    Boundary::new(14, 0),
238                    Boundary::new(21, 0)
239                ]
240            );
241            assert_eq!(
242                wrapper
243                    .wrap_line("          aaaaaaaaaaaaaa", px(72.))
244                    .collect::<Vec<_>>(),
245                &[
246                    Boundary::new(7, 0),
247                    Boundary::new(14, 3),
248                    Boundary::new(18, 3),
249                    Boundary::new(22, 3),
250                ]
251            );
252        });
253    }
254
255    #[test]
256    fn test_is_word_char() {
257        #[track_caller]
258        fn assert_word(word: &str) {
259            for c in word.chars() {
260                assert!(LineWrapper::is_word_char(c), "assertion failed for '{}'", c);
261            }
262        }
263
264        #[track_caller]
265        fn assert_not_word(word: &str) {
266            let found = word.chars().any(|c| !LineWrapper::is_word_char(c));
267            assert!(found, "assertion failed for '{}'", word);
268        }
269
270        assert_word("Hello123");
271        assert_word("non-English");
272        assert_word("var_name");
273        assert_word("123456");
274        assert_word("3.1415");
275        assert_word("10^2");
276        assert_word("1~2");
277        assert_word("100%");
278        assert_word("@mention");
279        assert_word("#hashtag");
280        assert_word("$variable");
281        assert_word("moreβ‹―");
282
283        // Space
284        assert_not_word("foo bar");
285
286        // URL case
287        assert_word("https://github.com/zed-industries/zed/");
288        assert_word("github.com");
289        assert_word("a=1&b=2");
290
291        // Latin-1 Supplement
292        assert_word("Γ€ΓΓ‚ΓƒΓ„Γ…Γ†Γ‡ΓˆΓ‰ΓŠΓ‹ΓŒΓΓŽΓ");
293        // Latin Extended-A
294        assert_word("Δ€ΔΔ‚ΔƒΔ„Δ…Δ†Δ‡ΔˆΔ‰ΔŠΔ‹ΔŒΔΔŽΔ");
295        // Latin Extended-B
296        assert_word("Ζ€ΖΖ‚ΖƒΖ„Ζ…Ζ†Ζ‡ΖˆΖ‰ΖŠΖ‹ΖŒΖΖŽΖ");
297        // Cyrillic
298        assert_word("ΠΠ‘Π’Π“Π”Π•Π–Π—Π˜Π™ΠšΠ›ΠœΠΠžΠŸ");
299
300        // non-word characters
301        assert_not_word("δ½ ε₯½");
302        assert_not_word("μ•ˆλ…•ν•˜μ„Έμš”");
303        assert_not_word("こんにけは");
304        assert_not_word("πŸ˜€πŸ˜πŸ˜‚");
305        assert_not_word("()[]{}<>");
306    }
307
308    // For compatibility with the test macro
309    #[cfg(target_os = "macos")]
310    use crate as gpui;
311
312    // These seem to vary wildly based on the the text system.
313    #[cfg(target_os = "macos")]
314    #[crate::test]
315    fn test_wrap_shaped_line(cx: &mut TestAppContext) {
316        cx.update(|cx| {
317            let text_system = WindowTextSystem::new(cx.text_system().clone());
318
319            let normal = TextRun {
320                len: 0,
321                font: font("Helvetica"),
322                color: Default::default(),
323                underline: Default::default(),
324                strikethrough: None,
325                background_color: None,
326            };
327            let bold = TextRun {
328                len: 0,
329                font: font("Helvetica").bold(),
330                color: Default::default(),
331                underline: Default::default(),
332                strikethrough: None,
333                background_color: None,
334            };
335
336            impl TextRun {
337                fn with_len(&self, len: usize) -> Self {
338                    let mut this = self.clone();
339                    this.len = len;
340                    this
341                }
342            }
343
344            let text = "aa bbb cccc ddddd eeee".into();
345            let lines = text_system
346                .shape_text(
347                    text,
348                    px(16.),
349                    &[
350                        normal.with_len(4),
351                        bold.with_len(5),
352                        normal.with_len(6),
353                        bold.with_len(1),
354                        normal.with_len(7),
355                    ],
356                    Some(px(72.)),
357                )
358                .unwrap();
359
360            assert_eq!(
361                lines[0].layout.wrap_boundaries(),
362                &[
363                    WrapBoundary {
364                        run_ix: 1,
365                        glyph_ix: 3
366                    },
367                    WrapBoundary {
368                        run_ix: 2,
369                        glyph_ix: 3
370                    },
371                    WrapBoundary {
372                        run_ix: 4,
373                        glyph_ix: 2
374                    }
375                ],
376            );
377        });
378    }
379}