text.go

  1package text
  2
  3import (
  4	"github.com/mattn/go-runewidth"
  5	"strings"
  6	"unicode/utf8"
  7)
  8
  9// Force runewidth not to treat ambiguous runes as wide chars, so that things
 10// like unicode ellipsis/up/down/left/right glyphs can have correct runewidth
 11// and can be displayed correctly in terminals.
 12func init() {
 13	runewidth.DefaultCondition.EastAsianWidth = false
 14}
 15
 16// Wrap a text for an exact line size
 17// Handle properly terminal color escape code
 18func Wrap(text string, lineWidth int) (string, int) {
 19	return WrapLeftPadded(text, lineWidth, 0)
 20}
 21
 22// Wrap a text for an exact line size with a left padding
 23// Handle properly terminal color escape code
 24func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
 25	var lines []string
 26	nbLine := 0
 27	pad := strings.Repeat(" ", leftPad)
 28
 29	// tabs are formatted as 4 spaces
 30	text = strings.Replace(text, "\t", "    ", -1)
 31	// NOTE: text is first segmented into lines so that softwrapLine can handle.
 32	for _, line := range strings.Split(text, "\n") {
 33		if line == "" || strings.TrimSpace(line) == "" {
 34			lines = append(lines, "")
 35			nbLine++
 36		} else {
 37			wrapped := softwrapLine(line, lineWidth-leftPad)
 38			firstLine := true
 39			for _, seg := range strings.Split(wrapped, "\n") {
 40				if firstLine {
 41					lines = append(lines, pad+strings.TrimRight(seg, " "))
 42					firstLine = false
 43				} else {
 44					lines = append(lines, pad+strings.TrimSpace(seg))
 45				}
 46				nbLine++
 47			}
 48		}
 49	}
 50	return strings.Join(lines, "\n"), nbLine
 51}
 52
 53// Break a line into several lines so that each line consumes at most
 54// 'textWidth' cells.  Lines break at groups of white spaces and multibyte
 55// chars. Nothing is removed from the original text so that it behaves like a
 56// softwrap.
 57//
 58// Required: The line shall not contain '\n'
 59//
 60// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
 61// breaks ("\n") are inserted between these groups so that the total length
 62// between breaks does not exceed the required width. Words that are longer than
 63// the textWidth are broken into pieces no longer than textWidth.
 64//
 65func softwrapLine(line string, textWidth int) string {
 66	// NOTE: terminal escapes are stripped out of the line so the algorithm is
 67	// simpler. Do not try to mix them in the wrapping algorithm, as it can get
 68	// complicated quickly.
 69	line1, termEscapes := extractTermEscapes(line)
 70
 71	chunks := segmentLine(line1)
 72	// Reverse the chunk array so we can use it as a stack.
 73	for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
 74		chunks[i], chunks[j] = chunks[j], chunks[i]
 75	}
 76	var line2 string = ""
 77	var width int = 0
 78	for len(chunks) > 0 {
 79		thisWord := chunks[len(chunks)-1]
 80		wl := wordLen(thisWord)
 81		if width+wl <= textWidth {
 82			line2 += chunks[len(chunks)-1]
 83			chunks = chunks[:len(chunks)-1]
 84			width += wl
 85			if width == textWidth && len(chunks) > 0 {
 86				// NOTE: new line begins when current line is full and there are more
 87				// chunks to come.
 88				line2 += "\n"
 89				width = 0
 90			}
 91		} else if wl > textWidth {
 92			// NOTE: By default, long words are splited to fill the remaining space.
 93			// But if the long words is the first non-space word in the middle of the
 94			// line, preceeding spaces shall not be counted in word spliting.
 95			splitWidth := textWidth - width
 96			if strings.HasSuffix(line2, "\n"+strings.Repeat(" ", width)) {
 97				splitWidth += width
 98			}
 99			left, right := splitWord(chunks[len(chunks)-1], splitWidth)
100			chunks[len(chunks)-1] = right
101			line2 += left + "\n"
102			width = 0
103		} else {
104			line2 += "\n"
105			width = 0
106		}
107	}
108
109	line3 := applyTermEscapes(line2, termEscapes)
110	return line3
111}
112
113// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
114// escape command, and 'pos' is the index in the rune array where the 'item'
115// shall be inserted back. For example, the escape item in "F\x1b33mox" is
116// {"\x1b33m", 1}.
117type escapeItem struct {
118	item string
119	pos  int
120}
121
122// Extract terminal escapes out of a line, returns a new line without terminal
123// escapes and a slice of escape items. The terminal escapes can be inserted
124// back into the new line at rune index 'item.pos' to recover the original line.
125//
126// Required: The line shall not contain "\n"
127//
128func extractTermEscapes(line string) (string, []escapeItem) {
129	var termEscapes []escapeItem
130	var line1 string
131
132	pos := 0
133	item := ""
134	occupiedRuneCount := 0
135	inEscape := false
136	for i, r := range []rune(line) {
137		if r == '\x1b' {
138			pos = i
139			item = string(r)
140			inEscape = true
141			continue
142		}
143		if inEscape {
144			item += string(r)
145			if r == 'm' {
146				termEscapes = append(termEscapes, escapeItem{item, pos - occupiedRuneCount})
147				occupiedRuneCount += utf8.RuneCountInString(item)
148				inEscape = false
149			}
150			continue
151		}
152		line1 += string(r)
153	}
154
155	return line1, termEscapes
156}
157
158// Apply the extracted terminal escapes to the edited line. The only edit
159// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
160// this since this function is not able to check it.
161func applyTermEscapes(line string, escapes []escapeItem) string {
162	if len(escapes) == 0 {
163		return line
164	}
165
166	var out string = ""
167
168	currPos := 0
169	currItem := 0
170	for _, r := range line {
171		if currItem < len(escapes) && currPos == escapes[currItem].pos {
172			// NOTE: We avoid terminal escapes at the end of a line by move them one
173			// pass the end of line, so that algorithms who trim right spaces are
174			// happy. But algorithms who trim left spaces are still unhappy.
175			if r == '\n' {
176				out += "\n" + escapes[currItem].item
177			} else {
178				out += escapes[currItem].item + string(r)
179				currPos++
180			}
181			currItem++
182		} else {
183			if r != '\n' {
184				currPos++
185			}
186			out += string(r)
187		}
188	}
189
190	// Don't forget the trailing escape, if any.
191	if currItem == len(escapes)-1 && currPos == escapes[currItem].pos {
192		out += escapes[currItem].item
193	}
194
195	return out
196}
197
198// Segment a line into chunks, where each chunk consists of chars with the same
199// type and is not breakable.
200func segmentLine(s string) []string {
201	var chunks []string
202
203	var word string
204	wordType := none
205	flushWord := func() {
206		chunks = append(chunks, word)
207		word = ""
208		wordType = none
209	}
210
211	for _, r := range s {
212		// A WIDE_CHAR itself constitutes a chunk.
213		thisType := runeType(r)
214		if thisType == wideChar {
215			if wordType != none {
216				flushWord()
217			}
218			chunks = append(chunks, string(r))
219			continue
220		}
221		// Other type of chunks starts with a char of that type, and ends with a
222		// char with different type or end of string.
223		if thisType != wordType {
224			if wordType != none {
225				flushWord()
226			}
227			word = string(r)
228			wordType = thisType
229		} else {
230			word += string(r)
231		}
232	}
233	if word != "" {
234		flushWord()
235	}
236
237	return chunks
238}
239
240// Rune categories
241//
242// These categories are so defined that each category forms a non-breakable
243// chunk. It IS NOT the same as unicode code point categories.
244//
245const (
246	none int = iota
247	wideChar
248	invisible
249	shortUnicode
250	space
251	visibleAscii
252)
253
254// Determine the category of a rune.
255func runeType(r rune) int {
256	rw := runewidth.RuneWidth(r)
257	if rw > 1 {
258		return wideChar
259	} else if rw == 0 {
260		return invisible
261	} else if r > 127 {
262		return shortUnicode
263	} else if r == ' ' {
264		return space
265	} else {
266		return visibleAscii
267	}
268}
269
270// wordLen return the length of a word, while ignoring the terminal escape
271// sequences
272func wordLen(word string) int {
273	length := 0
274	escape := false
275
276	for _, char := range word {
277		if char == '\x1b' {
278			escape = true
279		}
280		if !escape {
281			length += runewidth.RuneWidth(rune(char))
282		}
283		if char == 'm' {
284			escape = false
285		}
286	}
287
288	return length
289}
290
291// splitWord split a word at the given length, while ignoring the terminal escape sequences
292func splitWord(word string, length int) (string, string) {
293	runes := []rune(word)
294	var result []rune
295	added := 0
296	escape := false
297
298	if length == 0 {
299		return "", word
300	}
301
302	for _, r := range runes {
303		if r == '\x1b' {
304			escape = true
305		}
306
307		width := runewidth.RuneWidth(r)
308		if width+added > length {
309			// wide character made the length overflow
310			break
311		}
312
313		result = append(result, r)
314
315		if !escape {
316			added += width
317			if added >= length {
318				break
319			}
320		}
321
322		if r == 'm' {
323			escape = false
324		}
325	}
326
327	leftover := runes[len(result):]
328
329	return string(result), string(leftover)
330}