1package text
2
3import (
4 "github.com/mattn/go-runewidth"
5 "strings"
6 "unicode/utf8"
7)
8
9// Force runewidth not to treat ambiguous runes as wide chars, so that things
10// like unicode ellipsis/up/down/left/right glyphs can have correct runewidth
11// and can be displayed correctly in terminals.
12func init() {
13 runewidth.DefaultCondition.EastAsianWidth = false
14}
15
16// Wrap a text for an exact line size
17// Handle properly terminal color escape code
18func Wrap(text string, lineWidth int) (string, int) {
19 return WrapLeftPadded(text, lineWidth, 0)
20}
21
22// Wrap a text for an exact line size with a left padding
23// Handle properly terminal color escape code
24func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
25 var lines []string
26 nbLine := 0
27 pad := strings.Repeat(" ", leftPad)
28
29 // tabs are formatted as 4 spaces
30 text = strings.Replace(text, "\t", " ", -1)
31 // NOTE: text is first segmented into lines so that softwrapLine can handle.
32 for _, line := range strings.Split(text, "\n") {
33 if line == "" || strings.TrimSpace(line) == "" {
34 lines = append(lines, "")
35 nbLine++
36 } else {
37 wrapped := softwrapLine(line, lineWidth-leftPad)
38 firstLine := true
39 for _, seg := range strings.Split(wrapped, "\n") {
40 if firstLine {
41 lines = append(lines, pad+strings.TrimRight(seg, " "))
42 firstLine = false
43 } else {
44 lines = append(lines, pad+strings.TrimSpace(seg))
45 }
46 nbLine++
47 }
48 }
49 }
50 return strings.Join(lines, "\n"), nbLine
51}
52
53// Break a line into several lines so that each line consumes at most
54// 'textWidth' cells. Lines break at groups of white spaces and multibyte
55// chars. Nothing is removed from the original text so that it behaves like a
56// softwrap.
57//
58// Required: The line shall not contain '\n'
59//
60// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
61// breaks ("\n") are inserted between these groups so that the total length
62// between breaks does not exceed the required width. Words that are longer than
63// the textWidth are broen into pieces no longer than textWidth.
64//
65func softwrapLine(line string, textWidth int) string {
66 // NOTE: terminal escapes are stripped out of the line so the algorithm is
67 // simpler. Do not try to mix them in the wrapping algorithm, as it can get
68 // complicated quickly.
69 line1, termEscapes := extractTermEscapes(line)
70
71 chunks := segmentLine(line1)
72 // Reverse the chunk array so we can use it as a stack.
73 for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
74 chunks[i], chunks[j] = chunks[j], chunks[i]
75 }
76 var line2 string = ""
77 var width int = 0
78 for len(chunks) > 0 {
79 thisWord := chunks[len(chunks)-1]
80 wl := wordLen(thisWord)
81 if width+wl <= textWidth {
82 line2 += chunks[len(chunks)-1]
83 chunks = chunks[:len(chunks)-1]
84 width += wl
85 if width == textWidth && len(chunks) > 0 {
86 // NOTE: new line begins when current line is full and there are more
87 // chunks to come.
88 line2 += "\n"
89 width = 0
90 }
91 } else if wl > textWidth {
92 // NOTE: By default, long words are splited to fill the remaining space.
93 // But if the long words is the first non-space word in the middle of the
94 // line, preceeding spaces shall not be counted in word spliting.
95 splitWidth := textWidth - width
96 if strings.HasSuffix(line2, "\n"+strings.Repeat(" ", width)) {
97 splitWidth += width
98 }
99 left, right := splitWord(chunks[len(chunks)-1], splitWidth)
100 chunks[len(chunks)-1] = right
101 line2 += left + "\n"
102 width = 0
103 } else {
104 line2 += "\n"
105 width = 0
106 }
107 }
108
109 line3 := applyTermEscapes(line2, termEscapes)
110 return line3
111}
112
113// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
114// escape command, and 'pos' is the index in the rune array where the 'item'
115// shall be inserted back. For example, the escape item in "F\x1b33mox" is
116// {"\x1b33m", 1}.
117type escapeItem struct {
118 item string
119 pos int
120}
121
122// Extract terminal escapes out of a line, returns a new line without terminal
123// escapes and a slice of escape items. The terminal escapes can be inserted
124// back into the new line at rune index 'item.pos' to recover the original line.
125//
126// Required: The line shall not contain "\n"
127//
128func extractTermEscapes(line string) (string, []escapeItem) {
129 var termEscapes []escapeItem
130 var line1 string
131
132 pos := 0
133 item := ""
134 occupiedRuneCount := 0
135 inEscape := false
136 for i, r := range []rune(line) {
137 if r == '\x1b' {
138 pos = i
139 item = string(r)
140 inEscape = true
141 continue
142 }
143 if inEscape {
144 item += string(r)
145 if r == 'm' {
146 termEscapes = append(termEscapes, escapeItem{item, pos - occupiedRuneCount})
147 occupiedRuneCount += utf8.RuneCountInString(item)
148 inEscape = false
149 }
150 continue
151 }
152 line1 += string(r)
153 }
154
155 return line1, termEscapes
156}
157
158// Apply the extracted terminal escapes to the edited line. The only edit
159// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
160// this since this function is not able to check it.
161func applyTermEscapes(line string, escapes []escapeItem) string {
162 if len(escapes) == 0 {
163 return line
164 }
165
166 var out string = ""
167
168 currPos := 0
169 currItem := 0
170 for _, r := range line {
171 if currItem < len(escapes) && currPos == escapes[currItem].pos {
172 // NOTE: We avoid terminal escapes at the end of a line by move them one
173 // pass the end of line, so that algorithms who trim right spaces are
174 // happy. But algorithms who trim left spaces are still unhappy.
175 if r == '\n' {
176 out += "\n" + escapes[currItem].item
177 } else {
178 out += escapes[currItem].item + string(r)
179 currPos++
180 }
181 currItem++
182 } else {
183 if r != '\n' {
184 currPos++
185 }
186 out += string(r)
187 }
188 }
189
190 return out
191}
192
193// Segment a line into chunks, where each chunk consists of chars with the same
194// type and is not breakable.
195func segmentLine(s string) []string {
196 var chunks []string
197
198 var word string
199 wordType := none
200 flushWord := func() {
201 chunks = append(chunks, word)
202 word = ""
203 wordType = none
204 }
205
206 for _, r := range s {
207 // A WIDE_CHAR itself constitutes a chunk.
208 thisType := runeType(r)
209 if thisType == wideChar {
210 if wordType != none {
211 flushWord()
212 }
213 chunks = append(chunks, string(r))
214 continue
215 }
216 // Other type of chunks starts with a char of that type, and ends with a
217 // char with different type or end of string.
218 if thisType != wordType {
219 if wordType != none {
220 flushWord()
221 }
222 word = string(r)
223 wordType = thisType
224 } else {
225 word += string(r)
226 }
227 }
228 if word != "" {
229 flushWord()
230 }
231
232 return chunks
233}
234
235// Rune categories
236//
237// These categories are so defined that each category forms a non-breakable
238// chunk. It IS NOT the same as unicode code point categories.
239//
240const (
241 none int = iota
242 wideChar
243 invisible
244 shortUnicode
245 space
246 visibleAscii
247)
248
249// Determine the category of a rune.
250func runeType(r rune) int {
251 rw := runewidth.RuneWidth(r)
252 if rw > 1 {
253 return wideChar
254 } else if rw == 0 {
255 return invisible
256 } else if r > 127 {
257 return shortUnicode
258 } else if r == ' ' {
259 return space
260 } else {
261 return visibleAscii
262 }
263}
264
265// wordLen return the length of a word, while ignoring the terminal escape
266// sequences
267func wordLen(word string) int {
268 length := 0
269 escape := false
270
271 for _, char := range word {
272 if char == '\x1b' {
273 escape = true
274 }
275 if !escape {
276 length += runewidth.RuneWidth(rune(char))
277 }
278 if char == 'm' {
279 escape = false
280 }
281 }
282
283 return length
284}
285
286// splitWord split a word at the given length, while ignoring the terminal escape sequences
287func splitWord(word string, length int) (string, string) {
288 runes := []rune(word)
289 var result []rune
290 added := 0
291 escape := false
292
293 if length == 0 {
294 return "", word
295 }
296
297 for _, r := range runes {
298 if r == '\x1b' {
299 escape = true
300 }
301
302 width := runewidth.RuneWidth(r)
303 if width+added > length {
304 // wide character made the length overflow
305 break
306 }
307
308 result = append(result, r)
309
310 if !escape {
311 added += width
312 if added >= length {
313 break
314 }
315 }
316
317 if r == 'm' {
318 escape = false
319 }
320 }
321
322 leftover := runes[len(result):]
323
324 return string(result), string(leftover)
325}
326
327func minInt(a, b int) int {
328 if a > b {
329 return b
330 }
331 return a
332}