truncate.go

  1package ansi
  2
  3import (
  4	"bytes"
  5
  6	"github.com/charmbracelet/x/ansi/parser"
  7	"github.com/mattn/go-runewidth"
  8	"github.com/rivo/uniseg"
  9)
 10
 11// Cut the string, without adding any prefix or tail strings. This function is
 12// aware of ANSI escape codes and will not break them, and accounts for
 13// wide-characters (such as East-Asian characters and emojis).
 14// This treats the text as a sequence of graphemes.
 15func Cut(s string, left, right int) string {
 16	return cut(GraphemeWidth, s, left, right)
 17}
 18
 19// CutWc the string, without adding any prefix or tail strings. This function is
 20// aware of ANSI escape codes and will not break them, and accounts for
 21// wide-characters (such as East-Asian characters and emojis).
 22// Note that the [left] parameter is inclusive, while [right] isn't,
 23// which is to say it'll return `[left, right)`.
 24//
 25// This treats the text as a sequence of wide characters and runes.
 26func CutWc(s string, left, right int) string {
 27	return cut(WcWidth, s, left, right)
 28}
 29
 30func cut(m Method, s string, left, right int) string {
 31	if right <= left {
 32		return ""
 33	}
 34
 35	truncate := Truncate
 36	truncateLeft := TruncateLeft
 37	if m == WcWidth {
 38		truncate = TruncateWc
 39		truncateLeft = TruncateWc
 40	}
 41
 42	if left == 0 {
 43		return truncate(s, right, "")
 44	}
 45	return truncateLeft(truncate(s, right, ""), left, "")
 46}
 47
 48// Truncate truncates a string to a given length, adding a tail to the end if
 49// the string is longer than the given length. This function is aware of ANSI
 50// escape codes and will not break them, and accounts for wide-characters (such
 51// as East-Asian characters and emojis).
 52// This treats the text as a sequence of graphemes.
 53func Truncate(s string, length int, tail string) string {
 54	return truncate(GraphemeWidth, s, length, tail)
 55}
 56
 57// TruncateWc truncates a string to a given length, adding a tail to the end if
 58// the string is longer than the given length. This function is aware of ANSI
 59// escape codes and will not break them, and accounts for wide-characters (such
 60// as East-Asian characters and emojis).
 61// This treats the text as a sequence of wide characters and runes.
 62func TruncateWc(s string, length int, tail string) string {
 63	return truncate(WcWidth, s, length, tail)
 64}
 65
 66func truncate(m Method, s string, length int, tail string) string {
 67	if sw := StringWidth(s); sw <= length {
 68		return s
 69	}
 70
 71	tw := StringWidth(tail)
 72	length -= tw
 73	if length < 0 {
 74		return ""
 75	}
 76
 77	var cluster []byte
 78	var buf bytes.Buffer
 79	curWidth := 0
 80	ignoring := false
 81	pstate := parser.GroundState // initial state
 82	b := []byte(s)
 83	i := 0
 84
 85	// Here we iterate over the bytes of the string and collect printable
 86	// characters and runes. We also keep track of the width of the string
 87	// in cells.
 88	//
 89	// Once we reach the given length, we start ignoring characters and only
 90	// collect ANSI escape codes until we reach the end of string.
 91	for i < len(b) {
 92		state, action := parser.Table.Transition(pstate, b[i])
 93		if state == parser.Utf8State {
 94			// This action happens when we transition to the Utf8State.
 95			var width int
 96			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
 97			if m == WcWidth {
 98				width = runewidth.StringWidth(string(cluster))
 99			}
100
101			// increment the index by the length of the cluster
102			i += len(cluster)
103			curWidth += width
104
105			// Are we ignoring? Skip to the next byte
106			if ignoring {
107				continue
108			}
109
110			// Is this gonna be too wide?
111			// If so write the tail and stop collecting.
112			if curWidth > length && !ignoring {
113				ignoring = true
114				buf.WriteString(tail)
115			}
116
117			if curWidth > length {
118				continue
119			}
120
121			buf.Write(cluster)
122
123			// Done collecting, now we're back in the ground state.
124			pstate = parser.GroundState
125			continue
126		}
127
128		switch action {
129		case parser.PrintAction:
130			// Is this gonna be too wide?
131			// If so write the tail and stop collecting.
132			if curWidth >= length && !ignoring {
133				ignoring = true
134				buf.WriteString(tail)
135			}
136
137			// Skip to the next byte if we're ignoring
138			if ignoring {
139				i++
140				continue
141			}
142
143			// collects printable ASCII
144			curWidth++
145			fallthrough
146		case parser.ExecuteAction:
147			// execute action will be things like \n, which, if outside the cut,
148			// should be ignored.
149			if ignoring {
150				i++
151				continue
152			}
153			fallthrough
154		default:
155			buf.WriteByte(b[i])
156			i++
157		}
158
159		// Transition to the next state.
160		pstate = state
161
162		// Once we reach the given length, we start ignoring runes and write
163		// the tail to the buffer.
164		if curWidth > length && !ignoring {
165			ignoring = true
166			buf.WriteString(tail)
167		}
168	}
169
170	return buf.String()
171}
172
173// TruncateLeft truncates a string from the left side by removing n characters,
174// adding a prefix to the beginning if the string is longer than n.
175// This function is aware of ANSI escape codes and will not break them, and
176// accounts for wide-characters (such as East-Asian characters and emojis).
177// This treats the text as a sequence of graphemes.
178func TruncateLeft(s string, n int, prefix string) string {
179	return truncateLeft(GraphemeWidth, s, n, prefix)
180}
181
182// TruncateLeftWc truncates a string from the left side by removing n characters,
183// adding a prefix to the beginning if the string is longer than n.
184// This function is aware of ANSI escape codes and will not break them, and
185// accounts for wide-characters (such as East-Asian characters and emojis).
186// This treats the text as a sequence of wide characters and runes.
187func TruncateLeftWc(s string, n int, prefix string) string {
188	return truncateLeft(WcWidth, s, n, prefix)
189}
190
191func truncateLeft(m Method, s string, n int, prefix string) string {
192	if n <= 0 {
193		return s
194	}
195
196	var cluster []byte
197	var buf bytes.Buffer
198	curWidth := 0
199	ignoring := true
200	pstate := parser.GroundState
201	b := []byte(s)
202	i := 0
203
204	for i < len(b) {
205		if !ignoring {
206			buf.Write(b[i:])
207			break
208		}
209
210		state, action := parser.Table.Transition(pstate, b[i])
211		if state == parser.Utf8State {
212			var width int
213			cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
214			if m == WcWidth {
215				width = runewidth.StringWidth(string(cluster))
216			}
217
218			i += len(cluster)
219			curWidth += width
220
221			if curWidth > n && ignoring {
222				ignoring = false
223				buf.WriteString(prefix)
224			}
225
226			if curWidth > n {
227				buf.Write(cluster)
228			}
229
230			if ignoring {
231				continue
232			}
233
234			pstate = parser.GroundState
235			continue
236		}
237
238		switch action {
239		case parser.PrintAction:
240			curWidth++
241
242			if curWidth > n && ignoring {
243				ignoring = false
244				buf.WriteString(prefix)
245			}
246
247			if ignoring {
248				i++
249				continue
250			}
251
252			fallthrough
253		case parser.ExecuteAction:
254			// execute action will be things like \n, which, if outside the cut,
255			// should be ignored.
256			if ignoring {
257				i++
258				continue
259			}
260			fallthrough
261		default:
262			buf.WriteByte(b[i])
263			i++
264		}
265
266		pstate = state
267		if curWidth > n && ignoring {
268			ignoring = false
269			buf.WriteString(prefix)
270		}
271	}
272
273	return buf.String()
274}
275
276// ByteToGraphemeRange takes start and stop byte positions and converts them to
277// grapheme-aware char positions.
278// You can use this with [Truncate], [TruncateLeft], and [Cut].
279func ByteToGraphemeRange(str string, byteStart, byteStop int) (charStart, charStop int) {
280	bytePos, charPos := 0, 0
281	gr := uniseg.NewGraphemes(str)
282	for byteStart > bytePos {
283		if !gr.Next() {
284			break
285		}
286		bytePos += len(gr.Str())
287		charPos += max(1, gr.Width())
288	}
289	charStart = charPos
290	for byteStop > bytePos {
291		if !gr.Next() {
292			break
293		}
294		bytePos += len(gr.Str())
295		charPos += max(1, gr.Width())
296	}
297	charStop = charPos
298	return
299}