wrap.go

  1package ansi
  2
  3import (
  4	"bytes"
  5	"unicode"
  6	"unicode/utf8"
  7
  8	"github.com/charmbracelet/x/ansi/parser"
  9	"github.com/mattn/go-runewidth"
 10	"github.com/rivo/uniseg"
 11)
 12
 13// nbsp is a non-breaking space
 14const nbsp = 0xA0
 15
 16// Hardwrap wraps a string or a block of text to a given line length, breaking
 17// word boundaries. This will preserve ANSI escape codes and will account for
 18// wide-characters in the string.
 19// When preserveSpace is true, spaces at the beginning of a line will be
 20// preserved.
 21// This treats the text as a sequence of graphemes.
 22func Hardwrap(s string, limit int, preserveSpace bool) string {
 23	return hardwrap(GraphemeWidth, s, limit, preserveSpace)
 24}
 25
 26// HardwrapWc wraps a string or a block of text to a given line length, breaking
 27// word boundaries. This will preserve ANSI escape codes and will account for
 28// wide-characters in the string.
 29// When preserveSpace is true, spaces at the beginning of a line will be
 30// preserved.
 31// This treats the text as a sequence of wide characters and runes.
 32func HardwrapWc(s string, limit int, preserveSpace bool) string {
 33	return hardwrap(WcWidth, s, limit, preserveSpace)
 34}
 35
 36func hardwrap(m Method, s string, limit int, preserveSpace bool) string {
 37	if limit < 1 {
 38		return s
 39	}
 40
 41	var (
 42		cluster      []byte
 43		buf          bytes.Buffer
 44		curWidth     int
 45		forceNewline bool
 46		pstate       = parser.GroundState // initial state
 47		b            = []byte(s)
 48	)
 49
 50	addNewline := func() {
 51		buf.WriteByte('\n')
 52		curWidth = 0
 53	}
 54
 55	i := 0
 56	for i < len(b) {
 57		state, action := parser.Table.Transition(pstate, b[i])
 58		if state == parser.Utf8State {
 59			var width int
 60			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
 61			if m == WcWidth {
 62				width = runewidth.StringWidth(string(cluster))
 63			}
 64			i += len(cluster)
 65
 66			if curWidth+width > limit {
 67				addNewline()
 68			}
 69			if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
 70				// Skip spaces at the beginning of a line
 71				if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
 72					pstate = parser.GroundState
 73					continue
 74				}
 75			}
 76
 77			buf.Write(cluster)
 78			curWidth += width
 79			pstate = parser.GroundState
 80			continue
 81		}
 82
 83		switch action {
 84		case parser.PrintAction, parser.ExecuteAction:
 85			if b[i] == '\n' {
 86				addNewline()
 87				forceNewline = false
 88				break
 89			}
 90
 91			if curWidth+1 > limit {
 92				addNewline()
 93				forceNewline = true
 94			}
 95
 96			// Skip spaces at the beginning of a line
 97			if curWidth == 0 {
 98				if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
 99					break
100				}
101				forceNewline = false
102			}
103
104			buf.WriteByte(b[i])
105			if action == parser.PrintAction {
106				curWidth++
107			}
108		default:
109			buf.WriteByte(b[i])
110		}
111
112		// We manage the UTF8 state separately manually above.
113		if pstate != parser.Utf8State {
114			pstate = state
115		}
116		i++
117	}
118
119	return buf.String()
120}
121
122// Wordwrap wraps a string or a block of text to a given line length, not
123// breaking word boundaries. This will preserve ANSI escape codes and will
124// account for wide-characters in the string.
125// The breakpoints string is a list of characters that are considered
126// breakpoints for word wrapping. A hyphen (-) is always considered a
127// breakpoint.
128//
129// Note: breakpoints must be a string of 1-cell wide rune characters.
130//
131// This treats the text as a sequence of graphemes.
132func Wordwrap(s string, limit int, breakpoints string) string {
133	return wordwrap(GraphemeWidth, s, limit, breakpoints)
134}
135
136// WordwrapWc wraps a string or a block of text to a given line length, not
137// breaking word boundaries. This will preserve ANSI escape codes and will
138// account for wide-characters in the string.
139// The breakpoints string is a list of characters that are considered
140// breakpoints for word wrapping. A hyphen (-) is always considered a
141// breakpoint.
142//
143// Note: breakpoints must be a string of 1-cell wide rune characters.
144//
145// This treats the text as a sequence of wide characters and runes.
146func WordwrapWc(s string, limit int, breakpoints string) string {
147	return wordwrap(WcWidth, s, limit, breakpoints)
148}
149
150func wordwrap(m Method, s string, limit int, breakpoints string) string {
151	if limit < 1 {
152		return s
153	}
154
155	var (
156		cluster  []byte
157		buf      bytes.Buffer
158		word     bytes.Buffer
159		space    bytes.Buffer
160		curWidth int
161		wordLen  int
162		pstate   = parser.GroundState // initial state
163		b        = []byte(s)
164	)
165
166	addSpace := func() {
167		curWidth += space.Len()
168		buf.Write(space.Bytes())
169		space.Reset()
170	}
171
172	addWord := func() {
173		if word.Len() == 0 {
174			return
175		}
176
177		addSpace()
178		curWidth += wordLen
179		buf.Write(word.Bytes())
180		word.Reset()
181		wordLen = 0
182	}
183
184	addNewline := func() {
185		buf.WriteByte('\n')
186		curWidth = 0
187		space.Reset()
188	}
189
190	i := 0
191	for i < len(b) {
192		state, action := parser.Table.Transition(pstate, b[i])
193		if state == parser.Utf8State {
194			var width int
195			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
196			if m == WcWidth {
197				width = runewidth.StringWidth(string(cluster))
198			}
199			i += len(cluster)
200
201			r, _ := utf8.DecodeRune(cluster)
202			if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
203				addWord()
204				space.WriteRune(r)
205			} else if bytes.ContainsAny(cluster, breakpoints) {
206				addSpace()
207				addWord()
208				buf.Write(cluster)
209				curWidth++
210			} else {
211				word.Write(cluster)
212				wordLen += width
213				if curWidth+space.Len()+wordLen > limit &&
214					wordLen < limit {
215					addNewline()
216				}
217			}
218
219			pstate = parser.GroundState
220			continue
221		}
222
223		switch action {
224		case parser.PrintAction, parser.ExecuteAction:
225			r := rune(b[i])
226			switch {
227			case r == '\n':
228				if wordLen == 0 {
229					if curWidth+space.Len() > limit {
230						curWidth = 0
231					} else {
232						buf.Write(space.Bytes())
233					}
234					space.Reset()
235				}
236
237				addWord()
238				addNewline()
239			case unicode.IsSpace(r):
240				addWord()
241				space.WriteByte(b[i])
242			case r == '-':
243				fallthrough
244			case runeContainsAny(r, breakpoints):
245				addSpace()
246				addWord()
247				buf.WriteByte(b[i])
248				curWidth++
249			default:
250				word.WriteByte(b[i])
251				wordLen++
252				if curWidth+space.Len()+wordLen > limit &&
253					wordLen < limit {
254					addNewline()
255				}
256			}
257
258		default:
259			word.WriteByte(b[i])
260		}
261
262		// We manage the UTF8 state separately manually above.
263		if pstate != parser.Utf8State {
264			pstate = state
265		}
266		i++
267	}
268
269	addWord()
270
271	return buf.String()
272}
273
274// Wrap wraps a string or a block of text to a given line length, breaking word
275// boundaries if necessary. This will preserve ANSI escape codes and will
276// account for wide-characters in the string. The breakpoints string is a list
277// of characters that are considered breakpoints for word wrapping. A hyphen
278// (-) is always considered a breakpoint.
279//
280// Note: breakpoints must be a string of 1-cell wide rune characters.
281//
282// This treats the text as a sequence of graphemes.
283func Wrap(s string, limit int, breakpoints string) string {
284	return wrap(GraphemeWidth, s, limit, breakpoints)
285}
286
287// WrapWc wraps a string or a block of text to a given line length, breaking word
288// boundaries if necessary. This will preserve ANSI escape codes and will
289// account for wide-characters in the string. The breakpoints string is a list
290// of characters that are considered breakpoints for word wrapping. A hyphen
291// (-) is always considered a breakpoint.
292//
293// Note: breakpoints must be a string of 1-cell wide rune characters.
294//
295// This treats the text as a sequence of wide characters and runes.
296func WrapWc(s string, limit int, breakpoints string) string {
297	return wrap(WcWidth, s, limit, breakpoints)
298}
299
300func wrap(m Method, s string, limit int, breakpoints string) string {
301	if limit < 1 {
302		return s
303	}
304
305	var (
306		cluster    []byte
307		buf        bytes.Buffer
308		word       bytes.Buffer
309		space      bytes.Buffer
310		spaceWidth int                  // width of the space buffer
311		curWidth   int                  // written width of the line
312		wordLen    int                  // word buffer len without ANSI escape codes
313		pstate     = parser.GroundState // initial state
314		b          = []byte(s)
315	)
316
317	addSpace := func() {
318		curWidth += spaceWidth
319		buf.Write(space.Bytes())
320		space.Reset()
321		spaceWidth = 0
322	}
323
324	addWord := func() {
325		if word.Len() == 0 {
326			return
327		}
328
329		addSpace()
330		curWidth += wordLen
331		buf.Write(word.Bytes())
332		word.Reset()
333		wordLen = 0
334	}
335
336	addNewline := func() {
337		buf.WriteByte('\n')
338		curWidth = 0
339		space.Reset()
340		spaceWidth = 0
341	}
342
343	i := 0
344	for i < len(b) {
345		state, action := parser.Table.Transition(pstate, b[i])
346		if state == parser.Utf8State {
347			var width int
348			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
349			if m == WcWidth {
350				width = runewidth.StringWidth(string(cluster))
351			}
352			i += len(cluster)
353
354			r, _ := utf8.DecodeRune(cluster)
355			switch {
356			case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
357				addWord()
358				space.WriteRune(r)
359				spaceWidth += width
360			case bytes.ContainsAny(cluster, breakpoints):
361				addSpace()
362				if curWidth+wordLen+width > limit {
363					word.Write(cluster)
364					wordLen += width
365				} else {
366					addWord()
367					buf.Write(cluster)
368					curWidth += width
369				}
370			default:
371				if wordLen+width > limit {
372					// Hardwrap the word if it's too long
373					addWord()
374				}
375
376				word.Write(cluster)
377				wordLen += width
378
379				if curWidth+wordLen+spaceWidth > limit {
380					addNewline()
381				}
382			}
383
384			pstate = parser.GroundState
385			continue
386		}
387
388		switch action {
389		case parser.PrintAction, parser.ExecuteAction:
390			switch r := rune(b[i]); {
391			case r == '\n':
392				if wordLen == 0 {
393					if curWidth+spaceWidth > limit {
394						curWidth = 0
395					} else {
396						// preserve whitespaces
397						buf.Write(space.Bytes())
398					}
399					space.Reset()
400					spaceWidth = 0
401				}
402
403				addWord()
404				addNewline()
405			case unicode.IsSpace(r):
406				addWord()
407				space.WriteRune(r)
408				spaceWidth++
409			case r == '-':
410				fallthrough
411			case runeContainsAny(r, breakpoints):
412				addSpace()
413				if curWidth+wordLen >= limit {
414					// We can't fit the breakpoint in the current line, treat
415					// it as part of the word.
416					word.WriteRune(r)
417					wordLen++
418				} else {
419					addWord()
420					buf.WriteRune(r)
421					curWidth++
422				}
423			default:
424				if curWidth == limit {
425					addNewline()
426				}
427				word.WriteRune(r)
428				wordLen++
429
430				if wordLen == limit {
431					// Hardwrap the word if it's too long
432					addWord()
433				}
434
435				if curWidth+wordLen+spaceWidth > limit {
436					addNewline()
437				}
438			}
439
440		default:
441			word.WriteByte(b[i])
442		}
443
444		// We manage the UTF8 state separately manually above.
445		if pstate != parser.Utf8State {
446			pstate = state
447		}
448		i++
449	}
450
451	if wordLen == 0 {
452		if curWidth+spaceWidth > limit {
453			curWidth = 0
454		} else {
455			// preserve whitespaces
456			buf.Write(space.Bytes())
457		}
458		space.Reset()
459		spaceWidth = 0
460	}
461
462	addWord()
463
464	return buf.String()
465}
466
467func runeContainsAny(r rune, s string) bool {
468	for _, c := range s {
469		if c == r {
470			return true
471		}
472	}
473	return false
474}