runewidth.go

  1package runewidth
  2
  3import (
  4	"os"
  5	"strings"
  6
  7	"github.com/rivo/uniseg"
  8)
  9
 10//go:generate go run script/generate.go
 11
 12var (
 13	// EastAsianWidth will be set true if the current locale is CJK
 14	EastAsianWidth bool
 15
 16	// StrictEmojiNeutral should be set false if handle broken fonts
 17	StrictEmojiNeutral bool = true
 18
 19	// DefaultCondition is a condition in current locale
 20	DefaultCondition = &Condition{
 21		EastAsianWidth:     false,
 22		StrictEmojiNeutral: true,
 23	}
 24)
 25
 26func init() {
 27	handleEnv()
 28}
 29
 30func handleEnv() {
 31	env := os.Getenv("RUNEWIDTH_EASTASIAN")
 32	if env == "" {
 33		EastAsianWidth = IsEastAsian()
 34	} else {
 35		EastAsianWidth = env == "1"
 36	}
 37	// update DefaultCondition
 38	if DefaultCondition.EastAsianWidth != EastAsianWidth {
 39		DefaultCondition.EastAsianWidth = EastAsianWidth
 40		if len(DefaultCondition.combinedLut) > 0 {
 41			DefaultCondition.combinedLut = DefaultCondition.combinedLut[:0]
 42			CreateLUT()
 43		}
 44	}
 45}
 46
 47type interval struct {
 48	first rune
 49	last  rune
 50}
 51
 52type table []interval
 53
 54func inTables(r rune, ts ...table) bool {
 55	for _, t := range ts {
 56		if inTable(r, t) {
 57			return true
 58		}
 59	}
 60	return false
 61}
 62
 63func inTable(r rune, t table) bool {
 64	if r < t[0].first {
 65		return false
 66	}
 67
 68	bot := 0
 69	top := len(t) - 1
 70	for top >= bot {
 71		mid := (bot + top) >> 1
 72
 73		switch {
 74		case t[mid].last < r:
 75			bot = mid + 1
 76		case t[mid].first > r:
 77			top = mid - 1
 78		default:
 79			return true
 80		}
 81	}
 82
 83	return false
 84}
 85
 86var private = table{
 87	{0x00E000, 0x00F8FF}, {0x0F0000, 0x0FFFFD}, {0x100000, 0x10FFFD},
 88}
 89
 90var nonprint = table{
 91	{0x0000, 0x001F}, {0x007F, 0x009F}, {0x00AD, 0x00AD},
 92	{0x070F, 0x070F}, {0x180B, 0x180E}, {0x200B, 0x200F},
 93	{0x2028, 0x202E}, {0x206A, 0x206F}, {0xD800, 0xDFFF},
 94	{0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0xFFFE, 0xFFFF},
 95}
 96
 97// Condition have flag EastAsianWidth whether the current locale is CJK or not.
 98type Condition struct {
 99	combinedLut        []byte
100	EastAsianWidth     bool
101	StrictEmojiNeutral bool
102}
103
104// NewCondition return new instance of Condition which is current locale.
105func NewCondition() *Condition {
106	return &Condition{
107		EastAsianWidth:     EastAsianWidth,
108		StrictEmojiNeutral: StrictEmojiNeutral,
109	}
110}
111
112// RuneWidth returns the number of cells in r.
113// See http://www.unicode.org/reports/tr11/
114func (c *Condition) RuneWidth(r rune) int {
115	if r < 0 || r > 0x10FFFF {
116		return 0
117	}
118	if len(c.combinedLut) > 0 {
119		return int(c.combinedLut[r>>1]>>(uint(r&1)*4)) & 3
120	}
121	// optimized version, verified by TestRuneWidthChecksums()
122	if !c.EastAsianWidth {
123		switch {
124		case r < 0x20:
125			return 0
126		case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
127			return 0
128		case r < 0x300:
129			return 1
130		case inTable(r, narrow):
131			return 1
132		case inTables(r, nonprint, combining):
133			return 0
134		case inTable(r, doublewidth):
135			return 2
136		default:
137			return 1
138		}
139	} else {
140		switch {
141		case inTables(r, nonprint, combining):
142			return 0
143		case inTable(r, narrow):
144			return 1
145		case inTables(r, ambiguous, doublewidth):
146			return 2
147		case !c.StrictEmojiNeutral && inTables(r, ambiguous, emoji, narrow):
148			return 2
149		default:
150			return 1
151		}
152	}
153}
154
155// CreateLUT will create an in-memory lookup table of 557056 bytes for faster operation.
156// This should not be called concurrently with other operations on c.
157// If options in c is changed, CreateLUT should be called again.
158func (c *Condition) CreateLUT() {
159	const max = 0x110000
160	lut := c.combinedLut
161	if len(c.combinedLut) != 0 {
162		// Remove so we don't use it.
163		c.combinedLut = nil
164	} else {
165		lut = make([]byte, max/2)
166	}
167	for i := range lut {
168		i32 := int32(i * 2)
169		x0 := c.RuneWidth(i32)
170		x1 := c.RuneWidth(i32 + 1)
171		lut[i] = uint8(x0) | uint8(x1)<<4
172	}
173	c.combinedLut = lut
174}
175
176// StringWidth return width as you can see
177func (c *Condition) StringWidth(s string) (width int) {
178	g := uniseg.NewGraphemes(s)
179	for g.Next() {
180		var chWidth int
181		for _, r := range g.Runes() {
182			chWidth = c.RuneWidth(r)
183			if chWidth > 0 {
184				break // Our best guess at this point is to use the width of the first non-zero-width rune.
185			}
186		}
187		width += chWidth
188	}
189	return
190}
191
192// Truncate return string truncated with w cells
193func (c *Condition) Truncate(s string, w int, tail string) string {
194	if c.StringWidth(s) <= w {
195		return s
196	}
197	w -= c.StringWidth(tail)
198	var width int
199	pos := len(s)
200	g := uniseg.NewGraphemes(s)
201	for g.Next() {
202		var chWidth int
203		for _, r := range g.Runes() {
204			chWidth = c.RuneWidth(r)
205			if chWidth > 0 {
206				break // See StringWidth() for details.
207			}
208		}
209		if width+chWidth > w {
210			pos, _ = g.Positions()
211			break
212		}
213		width += chWidth
214	}
215	return s[:pos] + tail
216}
217
218// TruncateLeft cuts w cells from the beginning of the `s`.
219func (c *Condition) TruncateLeft(s string, w int, prefix string) string {
220	if c.StringWidth(s) <= w {
221		return prefix
222	}
223
224	var width int
225	pos := len(s)
226
227	g := uniseg.NewGraphemes(s)
228	for g.Next() {
229		var chWidth int
230		for _, r := range g.Runes() {
231			chWidth = c.RuneWidth(r)
232			if chWidth > 0 {
233				break // See StringWidth() for details.
234			}
235		}
236
237		if width+chWidth > w {
238			if width < w {
239				_, pos = g.Positions()
240				prefix += strings.Repeat(" ", width+chWidth-w)
241			} else {
242				pos, _ = g.Positions()
243			}
244
245			break
246		}
247
248		width += chWidth
249	}
250
251	return prefix + s[pos:]
252}
253
254// Wrap return string wrapped with w cells
255func (c *Condition) Wrap(s string, w int) string {
256	width := 0
257	out := ""
258	for _, r := range s {
259		cw := c.RuneWidth(r)
260		if r == '\n' {
261			out += string(r)
262			width = 0
263			continue
264		} else if width+cw > w {
265			out += "\n"
266			width = 0
267			out += string(r)
268			width += cw
269			continue
270		}
271		out += string(r)
272		width += cw
273	}
274	return out
275}
276
277// FillLeft return string filled in left by spaces in w cells
278func (c *Condition) FillLeft(s string, w int) string {
279	width := c.StringWidth(s)
280	count := w - width
281	if count > 0 {
282		b := make([]byte, count)
283		for i := range b {
284			b[i] = ' '
285		}
286		return string(b) + s
287	}
288	return s
289}
290
291// FillRight return string filled in left by spaces in w cells
292func (c *Condition) FillRight(s string, w int) string {
293	width := c.StringWidth(s)
294	count := w - width
295	if count > 0 {
296		b := make([]byte, count)
297		for i := range b {
298			b[i] = ' '
299		}
300		return s + string(b)
301	}
302	return s
303}
304
305// RuneWidth returns the number of cells in r.
306// See http://www.unicode.org/reports/tr11/
307func RuneWidth(r rune) int {
308	return DefaultCondition.RuneWidth(r)
309}
310
311// IsAmbiguousWidth returns whether is ambiguous width or not.
312func IsAmbiguousWidth(r rune) bool {
313	return inTables(r, private, ambiguous)
314}
315
316// IsNeutralWidth returns whether is neutral width or not.
317func IsNeutralWidth(r rune) bool {
318	return inTable(r, neutral)
319}
320
321// StringWidth return width as you can see
322func StringWidth(s string) (width int) {
323	return DefaultCondition.StringWidth(s)
324}
325
326// Truncate return string truncated with w cells
327func Truncate(s string, w int, tail string) string {
328	return DefaultCondition.Truncate(s, w, tail)
329}
330
331// TruncateLeft cuts w cells from the beginning of the `s`.
332func TruncateLeft(s string, w int, prefix string) string {
333	return DefaultCondition.TruncateLeft(s, w, prefix)
334}
335
336// Wrap return string wrapped with w cells
337func Wrap(s string, w int) string {
338	return DefaultCondition.Wrap(s, w)
339}
340
341// FillLeft return string filled in left by spaces in w cells
342func FillLeft(s string, w int) string {
343	return DefaultCondition.FillLeft(s, w)
344}
345
346// FillRight return string filled in left by spaces in w cells
347func FillRight(s string, w int) string {
348	return DefaultCondition.FillRight(s, w)
349}
350
351// CreateLUT will create an in-memory lookup table of 557055 bytes for faster operation.
352// This should not be called concurrently with other operations.
353func CreateLUT() {
354	if len(DefaultCondition.combinedLut) > 0 {
355		return
356	}
357	DefaultCondition.CreateLUT()
358}