map.go

  1// Copyright 2014 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package cases
  6
  7// This file contains the definitions of case mappings for all supported
  8// languages. The rules for the language-specific tailorings were taken and
  9// modified from the CLDR transform definitions in common/transforms.
 10
 11import (
 12	"strings"
 13	"unicode"
 14	"unicode/utf8"
 15
 16	"golang.org/x/text/internal"
 17	"golang.org/x/text/language"
 18	"golang.org/x/text/transform"
 19	"golang.org/x/text/unicode/norm"
 20)
 21
 22// A mapFunc takes a context set to the current rune and writes the mapped
 23// version to the same context. It may advance the context to the next rune. It
 24// returns whether a checkpoint is possible: whether the pDst bytes written to
 25// dst so far won't need changing as we see more source bytes.
 26type mapFunc func(*context) bool
 27
 28// A spanFunc takes a context set to the current rune and returns whether this
 29// rune would be altered when written to the output. It may advance the context
 30// to the next rune. It returns whether a checkpoint is possible.
 31type spanFunc func(*context) bool
 32
 33// maxIgnorable defines the maximum number of ignorables to consider for
 34// lookahead operations.
 35const maxIgnorable = 30
 36
 37// supported lists the language tags for which we have tailorings.
 38const supported = "und af az el lt nl tr"
 39
 40func init() {
 41	tags := []language.Tag{}
 42	for _, s := range strings.Split(supported, " ") {
 43		tags = append(tags, language.MustParse(s))
 44	}
 45	matcher = internal.NewInheritanceMatcher(tags)
 46	Supported = language.NewCoverage(tags)
 47}
 48
 49var (
 50	matcher *internal.InheritanceMatcher
 51
 52	Supported language.Coverage
 53
 54	// We keep the following lists separate, instead of having a single per-
 55	// language struct, to give the compiler a chance to remove unused code.
 56
 57	// Some uppercase mappers are stateless, so we can precompute the
 58	// Transformers and save a bit on runtime allocations.
 59	upperFunc = []struct {
 60		upper mapFunc
 61		span  spanFunc
 62	}{
 63		{nil, nil},                  // und
 64		{nil, nil},                  // af
 65		{aztrUpper(upper), isUpper}, // az
 66		{elUpper, noSpan},           // el
 67		{ltUpper(upper), noSpan},    // lt
 68		{nil, nil},                  // nl
 69		{aztrUpper(upper), isUpper}, // tr
 70	}
 71
 72	undUpper            transform.SpanningTransformer = &undUpperCaser{}
 73	undLower            transform.SpanningTransformer = &undLowerCaser{}
 74	undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
 75
 76	lowerFunc = []mapFunc{
 77		nil,       // und
 78		nil,       // af
 79		aztrLower, // az
 80		nil,       // el
 81		ltLower,   // lt
 82		nil,       // nl
 83		aztrLower, // tr
 84	}
 85
 86	titleInfos = []struct {
 87		title     mapFunc
 88		lower     mapFunc
 89		titleSpan spanFunc
 90		rewrite   func(*context)
 91	}{
 92		{title, lower, isTitle, nil},                // und
 93		{title, lower, isTitle, afnlRewrite},        // af
 94		{aztrUpper(title), aztrLower, isTitle, nil}, // az
 95		{title, lower, isTitle, nil},                // el
 96		{ltUpper(title), ltLower, noSpan, nil},      // lt
 97		{nlTitle, lower, nlTitleSpan, afnlRewrite},  // nl
 98		{aztrUpper(title), aztrLower, isTitle, nil}, // tr
 99	}
100)
101
102func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
103	_, i, _ := matcher.Match(t)
104	f := upperFunc[i].upper
105	if f == nil {
106		return undUpper
107	}
108	return &simpleCaser{f: f, span: upperFunc[i].span}
109}
110
111func makeLower(t language.Tag, o options) transform.SpanningTransformer {
112	_, i, _ := matcher.Match(t)
113	f := lowerFunc[i]
114	if f == nil {
115		if o.ignoreFinalSigma {
116			return undLowerIgnoreSigma
117		}
118		return undLower
119	}
120	if o.ignoreFinalSigma {
121		return &simpleCaser{f: f, span: isLower}
122	}
123	return &lowerCaser{
124		first:   f,
125		midWord: finalSigma(f),
126	}
127}
128
129func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
130	_, i, _ := matcher.Match(t)
131	x := &titleInfos[i]
132	lower := x.lower
133	if o.noLower {
134		lower = (*context).copy
135	} else if !o.ignoreFinalSigma {
136		lower = finalSigma(lower)
137	}
138	return &titleCaser{
139		title:     x.title,
140		lower:     lower,
141		titleSpan: x.titleSpan,
142		rewrite:   x.rewrite,
143	}
144}
145
146func noSpan(c *context) bool {
147	c.err = transform.ErrEndOfSpan
148	return false
149}
150
151// TODO: consider a similar special case for the fast majority lower case. This
152// is a bit more involved so will require some more precise benchmarking to
153// justify it.
154
155type undUpperCaser struct{ transform.NopResetter }
156
157// undUpperCaser implements the Transformer interface for doing an upper case
158// mapping for the root locale (und). It eliminates the need for an allocation
159// as it prevents escaping by not using function pointers.
160func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
161	c := context{dst: dst, src: src, atEOF: atEOF}
162	for c.next() {
163		upper(&c)
164		c.checkpoint()
165	}
166	return c.ret()
167}
168
169func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
170	c := context{src: src, atEOF: atEOF}
171	for c.next() && isUpper(&c) {
172		c.checkpoint()
173	}
174	return c.retSpan()
175}
176
177// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
178// a lower case mapping for the root locale (und) ignoring final sigma
179// handling. This casing algorithm is used in some performance-critical packages
180// like secure/precis and x/net/http/idna, which warrants its special-casing.
181type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
182
183func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
184	c := context{dst: dst, src: src, atEOF: atEOF}
185	for c.next() && lower(&c) {
186		c.checkpoint()
187	}
188	return c.ret()
189
190}
191
192// Span implements a generic lower-casing. This is possible as isLower works
193// for all lowercasing variants. All lowercase variants only vary in how they
194// transform a non-lowercase letter. They will never change an already lowercase
195// letter. In addition, there is no state.
196func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
197	c := context{src: src, atEOF: atEOF}
198	for c.next() && isLower(&c) {
199		c.checkpoint()
200	}
201	return c.retSpan()
202}
203
204type simpleCaser struct {
205	context
206	f    mapFunc
207	span spanFunc
208}
209
210// simpleCaser implements the Transformer interface for doing a case operation
211// on a rune-by-rune basis.
212func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
213	c := context{dst: dst, src: src, atEOF: atEOF}
214	for c.next() && t.f(&c) {
215		c.checkpoint()
216	}
217	return c.ret()
218}
219
220func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
221	c := context{src: src, atEOF: atEOF}
222	for c.next() && t.span(&c) {
223		c.checkpoint()
224	}
225	return c.retSpan()
226}
227
228// undLowerCaser implements the Transformer interface for doing a lower case
229// mapping for the root locale (und) ignoring final sigma handling. This casing
230// algorithm is used in some performance-critical packages like secure/precis
231// and x/net/http/idna, which warrants its special-casing.
232type undLowerCaser struct{ transform.NopResetter }
233
234func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
235	c := context{dst: dst, src: src, atEOF: atEOF}
236
237	for isInterWord := true; c.next(); {
238		if isInterWord {
239			if c.info.isCased() {
240				if !lower(&c) {
241					break
242				}
243				isInterWord = false
244			} else if !c.copy() {
245				break
246			}
247		} else {
248			if c.info.isNotCasedAndNotCaseIgnorable() {
249				if !c.copy() {
250					break
251				}
252				isInterWord = true
253			} else if !c.hasPrefix("Σ") {
254				if !lower(&c) {
255					break
256				}
257			} else if !finalSigmaBody(&c) {
258				break
259			}
260		}
261		c.checkpoint()
262	}
263	return c.ret()
264}
265
266func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
267	c := context{src: src, atEOF: atEOF}
268	for c.next() && isLower(&c) {
269		c.checkpoint()
270	}
271	return c.retSpan()
272}
273
274// lowerCaser implements the Transformer interface. The default Unicode lower
275// casing requires different treatment for the first and subsequent characters
276// of a word, most notably to handle the Greek final Sigma.
277type lowerCaser struct {
278	undLowerIgnoreSigmaCaser
279
280	context
281
282	first, midWord mapFunc
283}
284
285func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
286	t.context = context{dst: dst, src: src, atEOF: atEOF}
287	c := &t.context
288
289	for isInterWord := true; c.next(); {
290		if isInterWord {
291			if c.info.isCased() {
292				if !t.first(c) {
293					break
294				}
295				isInterWord = false
296			} else if !c.copy() {
297				break
298			}
299		} else {
300			if c.info.isNotCasedAndNotCaseIgnorable() {
301				if !c.copy() {
302					break
303				}
304				isInterWord = true
305			} else if !t.midWord(c) {
306				break
307			}
308		}
309		c.checkpoint()
310	}
311	return c.ret()
312}
313
314// titleCaser implements the Transformer interface. Title casing algorithms
315// distinguish between the first letter of a word and subsequent letters of the
316// same word. It uses state to avoid requiring a potentially infinite lookahead.
317type titleCaser struct {
318	context
319
320	// rune mappings used by the actual casing algorithms.
321	title     mapFunc
322	lower     mapFunc
323	titleSpan spanFunc
324
325	rewrite func(*context)
326}
327
328// Transform implements the standard Unicode title case algorithm as defined in
329// Chapter 3 of The Unicode Standard:
330// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
331// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
332// first cased character F following the word boundary. If F exists, map F to
333// Titlecase_Mapping(F); then map all characters C between F and the following
334// word boundary to Lowercase_Mapping(C).
335func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
336	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
337	c := &t.context
338
339	if !c.next() {
340		return c.ret()
341	}
342
343	for {
344		p := c.info
345		if t.rewrite != nil {
346			t.rewrite(c)
347		}
348
349		wasMid := p.isMid()
350		// Break out of this loop on failure to ensure we do not modify the
351		// state incorrectly.
352		if p.isCased() {
353			if !c.isMidWord {
354				if !t.title(c) {
355					break
356				}
357				c.isMidWord = true
358			} else if !t.lower(c) {
359				break
360			}
361		} else if !c.copy() {
362			break
363		} else if p.isBreak() {
364			c.isMidWord = false
365		}
366
367		// As we save the state of the transformer, it is safe to call
368		// checkpoint after any successful write.
369		if !(c.isMidWord && wasMid) {
370			c.checkpoint()
371		}
372
373		if !c.next() {
374			break
375		}
376		if wasMid && c.info.isMid() {
377			c.isMidWord = false
378		}
379	}
380	return c.ret()
381}
382
383func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
384	t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
385	c := &t.context
386
387	if !c.next() {
388		return c.retSpan()
389	}
390
391	for {
392		p := c.info
393		if t.rewrite != nil {
394			t.rewrite(c)
395		}
396
397		wasMid := p.isMid()
398		// Break out of this loop on failure to ensure we do not modify the
399		// state incorrectly.
400		if p.isCased() {
401			if !c.isMidWord {
402				if !t.titleSpan(c) {
403					break
404				}
405				c.isMidWord = true
406			} else if !isLower(c) {
407				break
408			}
409		} else if p.isBreak() {
410			c.isMidWord = false
411		}
412		// As we save the state of the transformer, it is safe to call
413		// checkpoint after any successful write.
414		if !(c.isMidWord && wasMid) {
415			c.checkpoint()
416		}
417
418		if !c.next() {
419			break
420		}
421		if wasMid && c.info.isMid() {
422			c.isMidWord = false
423		}
424	}
425	return c.retSpan()
426}
427
428// finalSigma adds Greek final Sigma handing to another casing function. It
429// determines whether a lowercased sigma should be σ or ς, by looking ahead for
430// case-ignorables and a cased letters.
431func finalSigma(f mapFunc) mapFunc {
432	return func(c *context) bool {
433		if !c.hasPrefix("Σ") {
434			return f(c)
435		}
436		return finalSigmaBody(c)
437	}
438}
439
440func finalSigmaBody(c *context) bool {
441	// Current rune must be ∑.
442
443	// ::NFD();
444	// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
445	// Σ } [:case-ignorable:]* [:cased:] → σ;
446	// [:cased:] [:case-ignorable:]* { Σ → ς;
447	// ::Any-Lower;
448	// ::NFC();
449
450	p := c.pDst
451	c.writeString("ς")
452
453	// TODO: we should do this here, but right now this will never have an
454	// effect as this is called when the prefix is Sigma, whereas Dutch and
455	// Afrikaans only test for an apostrophe.
456	//
457	// if t.rewrite != nil {
458	// 	t.rewrite(c)
459	// }
460
461	// We need to do one more iteration after maxIgnorable, as a cased
462	// letter is not an ignorable and may modify the result.
463	wasMid := false
464	for i := 0; i < maxIgnorable+1; i++ {
465		if !c.next() {
466			return false
467		}
468		if !c.info.isCaseIgnorable() {
469			// All Midword runes are also case ignorable, so we are
470			// guaranteed to have a letter or word break here. As we are
471			// unreading the run, there is no need to unset c.isMidWord;
472			// the title caser will handle this.
473			if c.info.isCased() {
474				// p+1 is guaranteed to be in bounds: if writing ς was
475				// successful, p+1 will contain the second byte of ς. If not,
476				// this function will have returned after c.next returned false.
477				c.dst[p+1]++ // ς → σ
478			}
479			c.unreadRune()
480			return true
481		}
482		// A case ignorable may also introduce a word break, so we may need
483		// to continue searching even after detecting a break.
484		isMid := c.info.isMid()
485		if (wasMid && isMid) || c.info.isBreak() {
486			c.isMidWord = false
487		}
488		wasMid = isMid
489		c.copy()
490	}
491	return true
492}
493
494// finalSigmaSpan would be the same as isLower.
495
496// elUpper implements Greek upper casing, which entails removing a predefined
497// set of non-blocked modifiers. Note that these accents should not be removed
498// for title casing!
499// Example: "Οδός" -> "ΟΔΟΣ".
500func elUpper(c *context) bool {
501	// From CLDR:
502	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
503	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
504
505	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
506	oldPDst := c.pDst
507	if !upper(c) {
508		return false
509	}
510	if !unicode.Is(unicode.Greek, r) {
511		return true
512	}
513	i := 0
514	// Take the properties of the uppercased rune that is already written to the
515	// destination. This saves us the trouble of having to uppercase the
516	// decomposed rune again.
517	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
518		// Restore the destination position and process the decomposed rune.
519		r, sz := utf8.DecodeRune(b)
520		if r <= 0xFF { // See A.6.1
521			return true
522		}
523		c.pDst = oldPDst
524		// Insert the first rune and ignore the modifiers. See A.6.2.
525		c.writeBytes(b[:sz])
526		i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
527	}
528
529	for ; i < maxIgnorable && c.next(); i++ {
530		switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
531		// Above and Iota Subscript
532		case 0x0300, // U+0300 COMBINING GRAVE ACCENT
533			0x0301, // U+0301 COMBINING ACUTE ACCENT
534			0x0304, // U+0304 COMBINING MACRON
535			0x0306, // U+0306 COMBINING BREVE
536			0x0308, // U+0308 COMBINING DIAERESIS
537			0x0313, // U+0313 COMBINING COMMA ABOVE
538			0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
539			0x0342, // U+0342 COMBINING GREEK PERISPOMENI
540			0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
541			// No-op. Gobble the modifier.
542
543		default:
544			switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
545			case cccZero:
546				c.unreadRune()
547				return true
548
549			// We don't need to test for IotaSubscript as the only rune that
550			// qualifies (U+0345) was already excluded in the switch statement
551			// above. See A.4.
552
553			case cccAbove:
554				return c.copy()
555			default:
556				// Some other modifier. We're still allowed to gobble Greek
557				// modifiers after this.
558				c.copy()
559			}
560		}
561	}
562	return i == maxIgnorable
563}
564
565// TODO: implement elUpperSpan (low-priority: complex and infrequent).
566
567func ltLower(c *context) bool {
568	// From CLDR:
569	// # Introduce an explicit dot above when lowercasing capital I's and J's
570	// # whenever there are more accents above.
571	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
572	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
573	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
574	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
575	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
576	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
577	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
578	// ::NFD();
579	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
580	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
581	// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
582	// I \u0300 (Ì) → i \u0307 \u0300;
583	// I \u0301 (Í) → i \u0307 \u0301;
584	// I \u0303 (Ĩ) → i \u0307 \u0303;
585	// ::Any-Lower();
586	// ::NFC();
587
588	i := 0
589	if r := c.src[c.pSrc]; r < utf8.RuneSelf {
590		lower(c)
591		if r != 'I' && r != 'J' {
592			return true
593		}
594	} else {
595		p := norm.NFD.Properties(c.src[c.pSrc:])
596		if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
597			// UTF-8 optimization: the decomposition will only have an above
598			// modifier if the last rune of the decomposition is in [U+300-U+311].
599			// In all other cases, a decomposition starting with I is always
600			// an I followed by modifiers that are not cased themselves. See A.2.
601			if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
602				if !c.writeBytes(d[:1]) {
603					return false
604				}
605				c.dst[c.pDst-1] += 'a' - 'A' // lower
606
607				// Assumption: modifier never changes on lowercase. See A.1.
608				// Assumption: all modifiers added have CCC = Above. See A.2.3.
609				return c.writeString("\u0307") && c.writeBytes(d[1:])
610			}
611			// In all other cases the additional modifiers will have a CCC
612			// that is less than 230 (Above). We will insert the U+0307, if
613			// needed, after these modifiers so that a string in FCD form
614			// will remain so. See A.2.2.
615			lower(c)
616			i = 1
617		} else {
618			return lower(c)
619		}
620	}
621
622	for ; i < maxIgnorable && c.next(); i++ {
623		switch c.info.cccType() {
624		case cccZero:
625			c.unreadRune()
626			return true
627		case cccAbove:
628			return c.writeString("\u0307") && c.copy() // See A.1.
629		default:
630			c.copy() // See A.1.
631		}
632	}
633	return i == maxIgnorable
634}
635
636// ltLowerSpan would be the same as isLower.
637
638func ltUpper(f mapFunc) mapFunc {
639	return func(c *context) bool {
640		// Unicode:
641		// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
642		//
643		// From CLDR:
644		// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
645		// # intervening non-230 marks.
646		// ::NFD();
647		// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
648		// ::Any-Upper();
649		// ::NFC();
650
651		// TODO: See A.5. A soft-dotted rune never has an exception. This would
652		// allow us to overload the exception bit and encode this property in
653		// info. Need to measure performance impact of this.
654		r, _ := utf8.DecodeRune(c.src[c.pSrc:])
655		oldPDst := c.pDst
656		if !f(c) {
657			return false
658		}
659		if !unicode.Is(unicode.Soft_Dotted, r) {
660			return true
661		}
662
663		// We don't need to do an NFD normalization, as a soft-dotted rune never
664		// contains U+0307. See A.3.
665
666		i := 0
667		for ; i < maxIgnorable && c.next(); i++ {
668			switch c.info.cccType() {
669			case cccZero:
670				c.unreadRune()
671				return true
672			case cccAbove:
673				if c.hasPrefix("\u0307") {
674					// We don't do a full NFC, but rather combine runes for
675					// some of the common cases. (Returning NFC or
676					// preserving normal form is neither a requirement nor
677					// a possibility anyway).
678					if !c.next() {
679						return false
680					}
681					if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
682						s := ""
683						switch c.src[c.pSrc+1] {
684						case 0x80: // U+0300 COMBINING GRAVE ACCENT
685							s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
686						case 0x81: // U+0301 COMBINING ACUTE ACCENT
687							s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
688						case 0x83: // U+0303 COMBINING TILDE
689							s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
690						case 0x88: // U+0308 COMBINING DIAERESIS
691							s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
692						default:
693						}
694						if s != "" {
695							c.pDst = oldPDst
696							return c.writeString(s)
697						}
698					}
699				}
700				return c.copy()
701			default:
702				c.copy()
703			}
704		}
705		return i == maxIgnorable
706	}
707}
708
709// TODO: implement ltUpperSpan (low priority: complex and infrequent).
710
711func aztrUpper(f mapFunc) mapFunc {
712	return func(c *context) bool {
713		// i→İ;
714		if c.src[c.pSrc] == 'i' {
715			return c.writeString("İ")
716		}
717		return f(c)
718	}
719}
720
721func aztrLower(c *context) (done bool) {
722	// From CLDR:
723	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
724	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
725	// İ→i;
726	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
727	// # This matches the behavior of the canonically equivalent I-dot_above
728	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
729	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
730	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
731	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
732	// I→ı ;
733	// ::Any-Lower();
734	if c.hasPrefix("\u0130") { // İ
735		return c.writeString("i")
736	}
737	if c.src[c.pSrc] != 'I' {
738		return lower(c)
739	}
740
741	// We ignore the lower-case I for now, but insert it later when we know
742	// which form we need.
743	start := c.pSrc + c.sz
744
745	i := 0
746Loop:
747	// We check for up to n ignorables before \u0307. As \u0307 is an
748	// ignorable as well, n is maxIgnorable-1.
749	for ; i < maxIgnorable && c.next(); i++ {
750		switch c.info.cccType() {
751		case cccAbove:
752			if c.hasPrefix("\u0307") {
753				return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
754			}
755			done = true
756			break Loop
757		case cccZero:
758			c.unreadRune()
759			done = true
760			break Loop
761		default:
762			// We'll write this rune after we know which starter to use.
763		}
764	}
765	if i == maxIgnorable {
766		done = true
767	}
768	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
769}
770
771// aztrLowerSpan would be the same as isLower.
772
773func nlTitle(c *context) bool {
774	// From CLDR:
775	// # Special titlecasing for Dutch initial "ij".
776	// ::Any-Title();
777	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
778	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
779	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
780		return title(c)
781	}
782
783	if !c.writeString("I") || !c.next() {
784		return false
785	}
786	if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
787		return c.writeString("J")
788	}
789	c.unreadRune()
790	return true
791}
792
793func nlTitleSpan(c *context) bool {
794	// From CLDR:
795	// # Special titlecasing for Dutch initial "ij".
796	// ::Any-Title();
797	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
798	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
799	if c.src[c.pSrc] != 'I' {
800		return isTitle(c)
801	}
802	if !c.next() || c.src[c.pSrc] == 'j' {
803		return false
804	}
805	if c.src[c.pSrc] != 'J' {
806		c.unreadRune()
807	}
808	return true
809}
810
811// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
812func afnlRewrite(c *context) {
813	if c.hasPrefix("'") || c.hasPrefix("’") {
814		c.isMidWord = true
815	}
816}