1package text
2
3import (
4 "github.com/mattn/go-runewidth"
5 "strings"
6 "unicode/utf8"
7)
8
9// Wrap a text for an exact line size
10// Handle properly terminal color escape code
11func Wrap(text string, lineWidth int) (string, int) {
12 return WrapLeftPadded(text, lineWidth, 0)
13}
14
15// Wrap a text for an exact line size with a left padding
16// Handle properly terminal color escape code
17func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
18 var lines []string
19 nbLine := 0
20 pad := strings.Repeat(" ", leftPad)
21
22 // tabs are formatted as 4 spaces
23 text = strings.Replace(text, "\t", " ", -1)
24 // NOTE: text is first segmented into lines so that softwrapLine can handle.
25 for _, line := range strings.Split(text, "\n") {
26 if line == "" || strings.TrimSpace(line) == "" {
27 lines = append(lines, "")
28 nbLine++
29 } else {
30 wrapped := softwrapLine(line, lineWidth-leftPad)
31 firstLine := true
32 for _, seg := range strings.Split(wrapped, "\n") {
33 if firstLine {
34 lines = append(lines, pad+strings.TrimRight(seg, " "))
35 firstLine = false
36 } else {
37 lines = append(lines, pad+strings.TrimSpace(seg))
38 }
39 nbLine++
40 }
41 }
42 }
43 return strings.Join(lines, "\n"), nbLine
44}
45
46// Break a line into several lines so that each line consumes at most
47// 'textWidth' cells. Lines break at groups of white spaces and multibyte
48// chars. Nothing is removed from the original text so that it behaves like a
49// softwrap.
50//
51// Required: The line shall not contain '\n'
52//
53// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
54// breaks ("\n") are inserted between these groups so that the total length
55// between breaks does not exceed the required width. Words that are longer than
56// the textWidth are broen into pieces no longer than textWidth.
57//
58func softwrapLine(line string, textWidth int) string {
59 // NOTE: terminal escapes are stripped out of the line so the algorithm is
60 // simpler. Do not try to mix them in the wrapping algorithm, as it can get
61 // complicated quickly.
62 line1, termEscapes := extractTermEscapes(line)
63
64 chunks := segmentLine(line1)
65 // Reverse the chunk array so we can use it as a stack.
66 for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
67 chunks[i], chunks[j] = chunks[j], chunks[i]
68 }
69 var line2 string = ""
70 var width int = 0
71 for len(chunks) > 0 {
72 thisWord := chunks[len(chunks)-1]
73 wl := wordLen(thisWord)
74 if width+wl <= textWidth {
75 line2 += chunks[len(chunks)-1]
76 chunks = chunks[:len(chunks)-1]
77 width += wl
78 if width == textWidth && len(chunks) > 0 {
79 // NOTE: new line begins when current line is full and there are more
80 // chunks to come.
81 line2 += "\n"
82 width = 0
83 }
84 } else if wl > textWidth {
85 left, right := splitWord(chunks[len(chunks)-1], textWidth)
86 line2 += left + "\n"
87 chunks[len(chunks)-1] = right
88 width = 0
89 } else {
90 line2 += "\n"
91 width = 0
92 }
93 }
94
95 line3 := applyTermEscapes(line2, termEscapes)
96 return line3
97}
98
99// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
100// escape command, and 'pos' is the index in the rune array where the 'item'
101// shall be inserted back. For example, the escape item in "F\x1b33mox" is
102// {"\x1b33m", 1}.
103type EscapeItem struct {
104 item string
105 pos int
106}
107
108// Extract terminal escapes out of a line, returns a new line without terminal
109// escapes and a slice of escape items. The terminal escapes can be inserted
110// back into the new line at rune index 'item.pos' to recover the original line.
111//
112// Required: The line shall not contain "\n"
113//
114func extractTermEscapes(line string) (string, []EscapeItem) {
115 var termEscapes []EscapeItem
116 var line1 string
117
118 pos := 0
119 item := ""
120 occupiedRuneCount := 0
121 inEscape := false
122 for i, r := range []rune(line) {
123 if r == '\x1b' {
124 pos = i
125 item = string(r)
126 inEscape = true
127 continue
128 }
129 if inEscape {
130 item += string(r)
131 if r == 'm' {
132 termEscapes = append(termEscapes, EscapeItem{item, pos - occupiedRuneCount})
133 occupiedRuneCount += utf8.RuneCountInString(item)
134 inEscape = false
135 }
136 continue
137 }
138 line1 += string(r)
139 }
140
141 return line1, termEscapes
142}
143
144// Apply the extracted terminal escapes to the edited line. The only edit
145// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
146// this since this function is not able to check it.
147func applyTermEscapes(line string, escapes []EscapeItem) string {
148 if len(escapes) == 0 {
149 return line
150 }
151
152 var out string = ""
153
154 currPos := 0
155 currItem := 0
156 for _, r := range line {
157 if currItem < len(escapes) && currPos == escapes[currItem].pos {
158 // NOTE: We avoid terminal escapes at the end of a line by move them one
159 // pass the end of line, so that algorithms who trim right spaces are
160 // happy. But algorithms who trim left spaces are still unhappy.
161 if r == '\n' {
162 out += "\n" + escapes[currItem].item
163 } else {
164 out += escapes[currItem].item + string(r)
165 currPos++
166 }
167 currItem++
168 } else {
169 if r != '\n' {
170 currPos++
171 }
172 out += string(r)
173 }
174 }
175
176 return out
177}
178
179// Segment a line into chunks, where each chunk consists of chars with the same
180// type and is not breakable.
181func segmentLine(s string) []string {
182 var chunks []string
183
184 var word string
185 wordType := NONE
186 flushWord := func() {
187 chunks = append(chunks, word)
188 word = ""
189 wordType = NONE
190 }
191
192 for _, r := range s {
193 // A WIDE_CHAR itself constitutes a chunk.
194 thisType := runeType(r)
195 if thisType == WIDE_CHAR {
196 if wordType != NONE {
197 flushWord()
198 }
199 chunks = append(chunks, string(r))
200 continue
201 }
202 // Other type of chunks starts with a char of that type, and ends with a
203 // char with different type or end of string.
204 if thisType != wordType {
205 if wordType != NONE {
206 flushWord()
207 }
208 word = string(r)
209 wordType = thisType
210 } else {
211 word += string(r)
212 }
213 }
214 if word != "" {
215 flushWord()
216 }
217
218 return chunks
219}
220
221// Rune categories
222//
223// These categories are so defined that each category forms a non-breakable
224// chunk. It IS NOT the same as unicode code point categories.
225//
226const (
227 NONE = -1
228 WIDE_CHAR = iota
229 INVISIBLE = iota
230 SHORT_UNICODE = iota
231 SPACE = iota
232 VISIBLE_ASCII = iota
233)
234
235// Determine the category of a rune.
236func runeType(r rune) int {
237 rw := runewidth.RuneWidth(r)
238 if rw > 1 {
239 return WIDE_CHAR
240 } else if rw == 0 {
241 return INVISIBLE
242 } else if r > 127 {
243 return SHORT_UNICODE
244 } else if r == ' ' {
245 return SPACE
246 } else {
247 return VISIBLE_ASCII
248 }
249}
250
251// wordLen return the length of a word, while ignoring the terminal escape
252// sequences
253func wordLen(word string) int {
254 length := 0
255 escape := false
256
257 for _, char := range word {
258 if char == '\x1b' {
259 escape = true
260 }
261 if !escape {
262 length += runewidth.RuneWidth(rune(char))
263 }
264 if char == 'm' {
265 escape = false
266 }
267 }
268
269 return length
270}
271
272// splitWord split a word at the given length, while ignoring the terminal escape sequences
273func splitWord(word string, length int) (string, string) {
274 runes := []rune(word)
275 var result []rune
276 added := 0
277 escape := false
278
279 if length == 0 {
280 return "", word
281 }
282
283 for _, r := range runes {
284 if r == '\x1b' {
285 escape = true
286 }
287
288 width := runewidth.RuneWidth(r)
289 if width+added > length {
290 // wide character made the length overflow
291 break
292 }
293
294 result = append(result, r)
295
296 if !escape {
297 added += width
298 if added >= length {
299 break
300 }
301 }
302
303 if r == 'm' {
304 escape = false
305 }
306 }
307
308 leftover := runes[len(result):]
309
310 return string(result), string(leftover)
311}
312
313func minInt(a, b int) int {
314 if a > b {
315 return b
316 }
317 return a
318}