text.go

  1package text
  2
  3import (
  4	"github.com/mattn/go-runewidth"
  5	"strings"
  6	"unicode/utf8"
  7)
  8
  9// Wrap a text for an exact line size
 10// Handle properly terminal color escape code
 11func Wrap(text string, lineWidth int) (string, int) {
 12	return WrapLeftPadded(text, lineWidth, 0)
 13}
 14
 15// Wrap a text for an exact line size with a left padding
 16// Handle properly terminal color escape code
 17func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
 18	var lines []string
 19	nbLine := 0
 20	pad := strings.Repeat(" ", leftPad)
 21
 22	// tabs are formatted as 4 spaces
 23	text = strings.Replace(text, "\t", "    ", -1)
 24	// NOTE: text is first segmented into lines so that softwrapLine can handle.
 25	for _, line := range strings.Split(text, "\n") {
 26		if line == "" || strings.TrimSpace(line) == "" {
 27			lines = append(lines, "")
 28			nbLine++
 29		} else {
 30			wrapped := softwrapLine(line, lineWidth-leftPad)
 31			firstLine := true
 32			for _, seg := range strings.Split(wrapped, "\n") {
 33				if firstLine {
 34					lines = append(lines, pad+strings.TrimRight(seg, " "))
 35					firstLine = false
 36				} else {
 37					lines = append(lines, pad+strings.TrimSpace(seg))
 38				}
 39				nbLine++
 40			}
 41		}
 42	}
 43	return strings.Join(lines, "\n"), nbLine
 44}
 45
 46// Break a line into several lines so that each line consumes at most
 47// 'textWidth' cells.  Lines break at groups of white spaces and multibyte
 48// chars. Nothing is removed from the original text so that it behaves like a
 49// softwrap.
 50//
 51// Required: The line shall not contain '\n'
 52//
 53// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
 54// breaks ("\n") are inserted between these groups so that the total length
 55// between breaks does not exceed the required width. Words that are longer than
 56// the textWidth are broen into pieces no longer than textWidth.
 57//
 58func softwrapLine(line string, textWidth int) string {
 59	// NOTE: terminal escapes are stripped out of the line so the algorithm is
 60	// simpler. Do not try to mix them in the wrapping algorithm, as it can get
 61	// complicated quickly.
 62	line1, termEscapes := extractTermEscapes(line)
 63
 64	chunks := segmentLine(line1)
 65	// Reverse the chunk array so we can use it as a stack.
 66	for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
 67		chunks[i], chunks[j] = chunks[j], chunks[i]
 68	}
 69	var line2 string = ""
 70	var width int = 0
 71	for len(chunks) > 0 {
 72		thisWord := chunks[len(chunks)-1]
 73		wl := wordLen(thisWord)
 74		if width+wl <= textWidth {
 75			line2 += chunks[len(chunks)-1]
 76			chunks = chunks[:len(chunks)-1]
 77			width += wl
 78			if width == textWidth && len(chunks) > 0 {
 79				// NOTE: new line begins when current line is full and there are more
 80				// chunks to come.
 81				line2 += "\n"
 82				width = 0
 83			}
 84		} else if wl > textWidth {
 85			left, right := splitWord(chunks[len(chunks)-1], textWidth)
 86			line2 += left + "\n"
 87			chunks[len(chunks)-1] = right
 88			width = 0
 89		} else {
 90			line2 += "\n"
 91			width = 0
 92		}
 93	}
 94
 95	line3 := applyTermEscapes(line2, termEscapes)
 96	return line3
 97}
 98
 99// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
100// escape command, and 'pos' is the index in the rune array where the 'item'
101// shall be inserted back. For example, the escape item in "F\x1b33mox" is
102// {"\x1b33m", 1}.
103type EscapeItem struct {
104	item string
105	pos  int
106}
107
108// Extract terminal escapes out of a line, returns a new line without terminal
109// escapes and a slice of escape items. The terminal escapes can be inserted
110// back into the new line at rune index 'item.pos' to recover the original line.
111//
112// Required: The line shall not contain "\n"
113//
114func extractTermEscapes(line string) (string, []EscapeItem) {
115	var termEscapes []EscapeItem
116	var line1 string
117
118	pos := 0
119	item := ""
120	occupiedRuneCount := 0
121	inEscape := false
122	for i, r := range []rune(line) {
123		if r == '\x1b' {
124			pos = i
125			item = string(r)
126			inEscape = true
127			continue
128		}
129		if inEscape {
130			item += string(r)
131			if r == 'm' {
132				termEscapes = append(termEscapes, EscapeItem{item, pos - occupiedRuneCount})
133				occupiedRuneCount += utf8.RuneCountInString(item)
134				inEscape = false
135			}
136			continue
137		}
138		line1 += string(r)
139	}
140
141	return line1, termEscapes
142}
143
144// Apply the extracted terminal escapes to the edited line. The only edit
145// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
146// this since this function is not able to check it.
147func applyTermEscapes(line string, escapes []EscapeItem) string {
148	if len(escapes) == 0 {
149		return line
150	}
151
152	var out string = ""
153
154	currPos := 0
155	currItem := 0
156	for _, r := range line {
157		if currItem < len(escapes) && currPos == escapes[currItem].pos {
158			// NOTE: We avoid terminal escapes at the end of a line by move them one
159			// pass the end of line, so that algorithms who trim right spaces are
160			// happy. But algorithms who trim left spaces are still unhappy.
161			if r == '\n' {
162				out += "\n" + escapes[currItem].item
163			} else {
164				out += escapes[currItem].item + string(r)
165				currPos++
166			}
167			currItem++
168		} else {
169			if r != '\n' {
170				currPos++
171			}
172			out += string(r)
173		}
174	}
175
176	return out
177}
178
179// Segment a line into chunks, where each chunk consists of chars with the same
180// type and is not breakable.
181func segmentLine(s string) []string {
182	var chunks []string
183
184	var word string
185	wordType := NONE
186	flushWord := func() {
187		chunks = append(chunks, word)
188		word = ""
189		wordType = NONE
190	}
191
192	for _, r := range s {
193		// A WIDE_CHAR itself constitutes a chunk.
194		thisType := runeType(r)
195		if thisType == WIDE_CHAR {
196			if wordType != NONE {
197				flushWord()
198			}
199			chunks = append(chunks, string(r))
200			continue
201		}
202		// Other type of chunks starts with a char of that type, and ends with a
203		// char with different type or end of string.
204		if thisType != wordType {
205			if wordType != NONE {
206				flushWord()
207			}
208			word = string(r)
209			wordType = thisType
210		} else {
211			word += string(r)
212		}
213	}
214	if word != "" {
215		flushWord()
216	}
217
218	return chunks
219}
220
221// Rune categories
222//
223// These categories are so defined that each category forms a non-breakable
224// chunk. It IS NOT the same as unicode code point categories.
225//
226const (
227	NONE          = -1
228	WIDE_CHAR     = iota
229	INVISIBLE     = iota
230	SHORT_UNICODE = iota
231	SPACE         = iota
232	VISIBLE_ASCII = iota
233)
234
235// Determine the category of a rune.
236func runeType(r rune) int {
237	rw := runewidth.RuneWidth(r)
238	if rw > 1 {
239		return WIDE_CHAR
240	} else if rw == 0 {
241		return INVISIBLE
242	} else if r > 127 {
243		return SHORT_UNICODE
244	} else if r == ' ' {
245		return SPACE
246	} else {
247		return VISIBLE_ASCII
248	}
249}
250
251// wordLen return the length of a word, while ignoring the terminal escape
252// sequences
253func wordLen(word string) int {
254	length := 0
255	escape := false
256
257	for _, char := range word {
258		if char == '\x1b' {
259			escape = true
260		}
261		if !escape {
262			length += runewidth.RuneWidth(rune(char))
263		}
264		if char == 'm' {
265			escape = false
266		}
267	}
268
269	return length
270}
271
272// splitWord split a word at the given length, while ignoring the terminal escape sequences
273func splitWord(word string, length int) (string, string) {
274	runes := []rune(word)
275	var result []rune
276	added := 0
277	escape := false
278
279	if length == 0 {
280		return "", word
281	}
282
283	for _, r := range runes {
284		if r == '\x1b' {
285			escape = true
286		}
287
288		width := runewidth.RuneWidth(r)
289		if width+added > length {
290			// wide character made the length overflow
291			break
292		}
293
294		result = append(result, r)
295
296		if !escape {
297			added += width
298			if added >= length {
299				break
300			}
301		}
302
303		if r == 'm' {
304			escape = false
305		}
306	}
307
308	leftover := runes[len(result):]
309
310	return string(result), string(leftover)
311}
312
313func minInt(a, b int) int {
314	if a > b {
315		return b
316	}
317	return a
318}