cases.go

  1// Copyright 2014 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5//go:generate go run gen.go gen_trieval.go
  6
  7// Package cases provides general and language-specific case mappers.
  8package cases // import "golang.org/x/text/cases"
  9
 10import (
 11	"golang.org/x/text/language"
 12	"golang.org/x/text/transform"
 13)
 14
 15// References:
 16// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
 17// - https://www.unicode.org/reports/tr29/
 18// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
 19// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
 20// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
 21// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
 22// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
 23// - http://userguide.icu-project.org/transforms/casemappings
 24
 25// TODO:
 26// - Case folding
 27// - Wide and Narrow?
 28// - Segmenter option for title casing.
 29// - ASCII fast paths
 30// - Encode Soft-Dotted property within trie somehow.
 31
 32// A Caser transforms given input to a certain case. It implements
 33// transform.Transformer.
 34//
 35// A Caser may be stateful and should therefore not be shared between
 36// goroutines.
 37type Caser struct {
 38	t transform.SpanningTransformer
 39}
 40
 41// Bytes returns a new byte slice with the result of converting b to the case
 42// form implemented by c.
 43func (c Caser) Bytes(b []byte) []byte {
 44	b, _, _ = transform.Bytes(c.t, b)
 45	return b
 46}
 47
 48// String returns a string with the result of transforming s to the case form
 49// implemented by c.
 50func (c Caser) String(s string) string {
 51	s, _, _ = transform.String(c.t, s)
 52	return s
 53}
 54
 55// Reset resets the Caser to be reused for new input after a previous call to
 56// Transform.
 57func (c Caser) Reset() { c.t.Reset() }
 58
 59// Transform implements the transform.Transformer interface and transforms the
 60// given input to the case form implemented by c.
 61func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 62	return c.t.Transform(dst, src, atEOF)
 63}
 64
 65// Span implements the transform.SpanningTransformer interface.
 66func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
 67	return c.t.Span(src, atEOF)
 68}
 69
 70// Upper returns a Caser for language-specific uppercasing.
 71func Upper(t language.Tag, opts ...Option) Caser {
 72	return Caser{makeUpper(t, getOpts(opts...))}
 73}
 74
 75// Lower returns a Caser for language-specific lowercasing.
 76func Lower(t language.Tag, opts ...Option) Caser {
 77	return Caser{makeLower(t, getOpts(opts...))}
 78}
 79
 80// Title returns a Caser for language-specific title casing. It uses an
 81// approximation of the default Unicode Word Break algorithm.
 82func Title(t language.Tag, opts ...Option) Caser {
 83	return Caser{makeTitle(t, getOpts(opts...))}
 84}
 85
 86// Fold returns a Caser that implements Unicode case folding. The returned Caser
 87// is stateless and safe to use concurrently by multiple goroutines.
 88//
 89// Case folding does not normalize the input and may not preserve a normal form.
 90// Use the collate or search package for more convenient and linguistically
 91// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
 92// where security aspects are a concern.
 93func Fold(opts ...Option) Caser {
 94	return Caser{makeFold(getOpts(opts...))}
 95}
 96
 97// An Option is used to modify the behavior of a Caser.
 98type Option func(o options) options
 99
100// TODO: consider these options to take a boolean as well, like FinalSigma.
101// The advantage of using this approach is that other providers of a lower-case
102// algorithm could set different defaults by prefixing a user-provided slice
103// of options with their own. This is handy, for instance, for the precis
104// package which would override the default to not handle the Greek final sigma.
105
106var (
107	// NoLower disables the lowercasing of non-leading letters for a title
108	// caser.
109	NoLower Option = noLower
110
111	// Compact omits mappings in case folding for characters that would grow the
112	// input. (Unimplemented.)
113	Compact Option = compact
114)
115
116// TODO: option to preserve a normal form, if applicable?
117
118type options struct {
119	noLower bool
120	simple  bool
121
122	// TODO: segmenter, max ignorable, alternative versions, etc.
123
124	ignoreFinalSigma bool
125}
126
127func getOpts(o ...Option) (res options) {
128	for _, f := range o {
129		res = f(res)
130	}
131	return
132}
133
134func noLower(o options) options {
135	o.noLower = true
136	return o
137}
138
139func compact(o options) options {
140	o.simple = true
141	return o
142}
143
144// HandleFinalSigma specifies whether the special handling of Greek final sigma
145// should be enabled. Unicode prescribes handling the Greek final sigma for all
146// locales, but standards like IDNA and PRECIS override this default.
147func HandleFinalSigma(enable bool) Option {
148	if enable {
149		return handleFinalSigma
150	}
151	return ignoreFinalSigma
152}
153
154func ignoreFinalSigma(o options) options {
155	o.ignoreFinalSigma = true
156	return o
157}
158
159func handleFinalSigma(o options) options {
160	o.ignoreFinalSigma = false
161	return o
162}