1package ansi
2
3import (
4 "bytes"
5
6 "github.com/charmbracelet/x/ansi/parser"
7 "github.com/mattn/go-runewidth"
8 "github.com/rivo/uniseg"
9)
10
11// Cut the string, without adding any prefix or tail strings. This function is
12// aware of ANSI escape codes and will not break them, and accounts for
13// wide-characters (such as East-Asian characters and emojis).
14// This treats the text as a sequence of graphemes.
15func Cut(s string, left, right int) string {
16 return cut(GraphemeWidth, s, left, right)
17}
18
19// CutWc the string, without adding any prefix or tail strings. This function is
20// aware of ANSI escape codes and will not break them, and accounts for
21// wide-characters (such as East-Asian characters and emojis).
22// Note that the [left] parameter is inclusive, while [right] isn't,
23// which is to say it'll return `[left, right)`.
24//
25// This treats the text as a sequence of wide characters and runes.
26func CutWc(s string, left, right int) string {
27 return cut(WcWidth, s, left, right)
28}
29
30func cut(m Method, s string, left, right int) string {
31 if right <= left {
32 return ""
33 }
34
35 truncate := Truncate
36 truncateLeft := TruncateLeft
37 if m == WcWidth {
38 truncate = TruncateWc
39 truncateLeft = TruncateWc
40 }
41
42 if left == 0 {
43 return truncate(s, right, "")
44 }
45 return truncateLeft(truncate(s, right, ""), left, "")
46}
47
48// Truncate truncates a string to a given length, adding a tail to the end if
49// the string is longer than the given length. This function is aware of ANSI
50// escape codes and will not break them, and accounts for wide-characters (such
51// as East-Asian characters and emojis).
52// This treats the text as a sequence of graphemes.
53func Truncate(s string, length int, tail string) string {
54 return truncate(GraphemeWidth, s, length, tail)
55}
56
57// TruncateWc truncates a string to a given length, adding a tail to the end if
58// the string is longer than the given length. This function is aware of ANSI
59// escape codes and will not break them, and accounts for wide-characters (such
60// as East-Asian characters and emojis).
61// This treats the text as a sequence of wide characters and runes.
62func TruncateWc(s string, length int, tail string) string {
63 return truncate(WcWidth, s, length, tail)
64}
65
66func truncate(m Method, s string, length int, tail string) string {
67 if sw := StringWidth(s); sw <= length {
68 return s
69 }
70
71 tw := StringWidth(tail)
72 length -= tw
73 if length < 0 {
74 return ""
75 }
76
77 var cluster []byte
78 var buf bytes.Buffer
79 curWidth := 0
80 ignoring := false
81 pstate := parser.GroundState // initial state
82 b := []byte(s)
83 i := 0
84
85 // Here we iterate over the bytes of the string and collect printable
86 // characters and runes. We also keep track of the width of the string
87 // in cells.
88 //
89 // Once we reach the given length, we start ignoring characters and only
90 // collect ANSI escape codes until we reach the end of string.
91 for i < len(b) {
92 state, action := parser.Table.Transition(pstate, b[i])
93 if state == parser.Utf8State {
94 // This action happens when we transition to the Utf8State.
95 var width int
96 cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
97 if m == WcWidth {
98 width = runewidth.StringWidth(string(cluster))
99 }
100
101 // increment the index by the length of the cluster
102 i += len(cluster)
103 curWidth += width
104
105 // Are we ignoring? Skip to the next byte
106 if ignoring {
107 continue
108 }
109
110 // Is this gonna be too wide?
111 // If so write the tail and stop collecting.
112 if curWidth > length && !ignoring {
113 ignoring = true
114 buf.WriteString(tail)
115 }
116
117 if curWidth > length {
118 continue
119 }
120
121 buf.Write(cluster)
122
123 // Done collecting, now we're back in the ground state.
124 pstate = parser.GroundState
125 continue
126 }
127
128 switch action {
129 case parser.PrintAction:
130 // Is this gonna be too wide?
131 // If so write the tail and stop collecting.
132 if curWidth >= length && !ignoring {
133 ignoring = true
134 buf.WriteString(tail)
135 }
136
137 // Skip to the next byte if we're ignoring
138 if ignoring {
139 i++
140 continue
141 }
142
143 // collects printable ASCII
144 curWidth++
145 fallthrough
146 case parser.ExecuteAction:
147 // execute action will be things like \n, which, if outside the cut,
148 // should be ignored.
149 if ignoring {
150 i++
151 continue
152 }
153 fallthrough
154 default:
155 buf.WriteByte(b[i])
156 i++
157 }
158
159 // Transition to the next state.
160 pstate = state
161
162 // Once we reach the given length, we start ignoring runes and write
163 // the tail to the buffer.
164 if curWidth > length && !ignoring {
165 ignoring = true
166 buf.WriteString(tail)
167 }
168 }
169
170 return buf.String()
171}
172
173// TruncateLeft truncates a string from the left side by removing n characters,
174// adding a prefix to the beginning if the string is longer than n.
175// This function is aware of ANSI escape codes and will not break them, and
176// accounts for wide-characters (such as East-Asian characters and emojis).
177// This treats the text as a sequence of graphemes.
178func TruncateLeft(s string, n int, prefix string) string {
179 return truncateLeft(GraphemeWidth, s, n, prefix)
180}
181
182// TruncateLeftWc truncates a string from the left side by removing n characters,
183// adding a prefix to the beginning if the string is longer than n.
184// This function is aware of ANSI escape codes and will not break them, and
185// accounts for wide-characters (such as East-Asian characters and emojis).
186// This treats the text as a sequence of wide characters and runes.
187func TruncateLeftWc(s string, n int, prefix string) string {
188 return truncateLeft(WcWidth, s, n, prefix)
189}
190
191func truncateLeft(m Method, s string, n int, prefix string) string {
192 if n <= 0 {
193 return s
194 }
195
196 var cluster []byte
197 var buf bytes.Buffer
198 curWidth := 0
199 ignoring := true
200 pstate := parser.GroundState
201 b := []byte(s)
202 i := 0
203
204 for i < len(b) {
205 if !ignoring {
206 buf.Write(b[i:])
207 break
208 }
209
210 state, action := parser.Table.Transition(pstate, b[i])
211 if state == parser.Utf8State {
212 var width int
213 cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
214 if m == WcWidth {
215 width = runewidth.StringWidth(string(cluster))
216 }
217
218 i += len(cluster)
219 curWidth += width
220
221 if curWidth > n && ignoring {
222 ignoring = false
223 buf.WriteString(prefix)
224 }
225
226 if curWidth > n {
227 buf.Write(cluster)
228 }
229
230 if ignoring {
231 continue
232 }
233
234 pstate = parser.GroundState
235 continue
236 }
237
238 switch action {
239 case parser.PrintAction:
240 curWidth++
241
242 if curWidth > n && ignoring {
243 ignoring = false
244 buf.WriteString(prefix)
245 }
246
247 if ignoring {
248 i++
249 continue
250 }
251
252 fallthrough
253 case parser.ExecuteAction:
254 // execute action will be things like \n, which, if outside the cut,
255 // should be ignored.
256 if ignoring {
257 i++
258 continue
259 }
260 fallthrough
261 default:
262 buf.WriteByte(b[i])
263 i++
264 }
265
266 pstate = state
267 if curWidth > n && ignoring {
268 ignoring = false
269 buf.WriteString(prefix)
270 }
271 }
272
273 return buf.String()
274}
275
276// ByteToGraphemeRange takes start and stop byte positions and converts them to
277// grapheme-aware char positions.
278// You can use this with [Truncate], [TruncateLeft], and [Cut].
279func ByteToGraphemeRange(str string, byteStart, byteStop int) (charStart, charStop int) {
280 bytePos, charPos := 0, 0
281 gr := uniseg.NewGraphemes(str)
282 for byteStart > bytePos {
283 if !gr.Next() {
284 break
285 }
286 bytePos += len(gr.Str())
287 charPos += max(1, gr.Width())
288 }
289 charStart = charPos
290 for byteStop > bytePos {
291 if !gr.Next() {
292 break
293 }
294 bytePos += len(gr.Str())
295 charPos += max(1, gr.Width())
296 }
297 charStop = charPos
298 return
299}