match.go

  1// Copyright 2013 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package language
  6
  7import "errors"
  8
  9type scriptRegionFlags uint8
 10
 11const (
 12	isList = 1 << iota
 13	scriptInFrom
 14	regionInFrom
 15)
 16
 17func (t *Tag) setUndefinedLang(id Language) {
 18	if t.LangID == 0 {
 19		t.LangID = id
 20	}
 21}
 22
 23func (t *Tag) setUndefinedScript(id Script) {
 24	if t.ScriptID == 0 {
 25		t.ScriptID = id
 26	}
 27}
 28
 29func (t *Tag) setUndefinedRegion(id Region) {
 30	if t.RegionID == 0 || t.RegionID.Contains(id) {
 31		t.RegionID = id
 32	}
 33}
 34
 35// ErrMissingLikelyTagsData indicates no information was available
 36// to compute likely values of missing tags.
 37var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
 38
 39// addLikelySubtags sets subtags to their most likely value, given the locale.
 40// In most cases this means setting fields for unknown values, but in some
 41// cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
 42// if the given locale cannot be expanded.
 43func (t Tag) addLikelySubtags() (Tag, error) {
 44	id, err := addTags(t)
 45	if err != nil {
 46		return t, err
 47	} else if id.equalTags(t) {
 48		return t, nil
 49	}
 50	id.RemakeString()
 51	return id, nil
 52}
 53
 54// specializeRegion attempts to specialize a group region.
 55func specializeRegion(t *Tag) bool {
 56	if i := regionInclusion[t.RegionID]; i < nRegionGroups {
 57		x := likelyRegionGroup[i]
 58		if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
 59			t.RegionID = Region(x.region)
 60		}
 61		return true
 62	}
 63	return false
 64}
 65
 66// Maximize returns a new tag with missing tags filled in.
 67func (t Tag) Maximize() (Tag, error) {
 68	return addTags(t)
 69}
 70
 71func addTags(t Tag) (Tag, error) {
 72	// We leave private use identifiers alone.
 73	if t.IsPrivateUse() {
 74		return t, nil
 75	}
 76	if t.ScriptID != 0 && t.RegionID != 0 {
 77		if t.LangID != 0 {
 78			// already fully specified
 79			specializeRegion(&t)
 80			return t, nil
 81		}
 82		// Search matches for und-script-region. Note that for these cases
 83		// region will never be a group so there is no need to check for this.
 84		list := likelyRegion[t.RegionID : t.RegionID+1]
 85		if x := list[0]; x.flags&isList != 0 {
 86			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
 87		}
 88		for _, x := range list {
 89			// Deviating from the spec. See match_test.go for details.
 90			if Script(x.script) == t.ScriptID {
 91				t.setUndefinedLang(Language(x.lang))
 92				return t, nil
 93			}
 94		}
 95	}
 96	if t.LangID != 0 {
 97		// Search matches for lang-script and lang-region, where lang != und.
 98		if t.LangID < langNoIndexOffset {
 99			x := likelyLang[t.LangID]
100			if x.flags&isList != 0 {
101				list := likelyLangList[x.region : x.region+uint16(x.script)]
102				if t.ScriptID != 0 {
103					for _, x := range list {
104						if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
105							t.setUndefinedRegion(Region(x.region))
106							return t, nil
107						}
108					}
109				} else if t.RegionID != 0 {
110					count := 0
111					goodScript := true
112					tt := t
113					for _, x := range list {
114						// We visit all entries for which the script was not
115						// defined, including the ones where the region was not
116						// defined. This allows for proper disambiguation within
117						// regions.
118						if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
119							tt.RegionID = Region(x.region)
120							tt.setUndefinedScript(Script(x.script))
121							goodScript = goodScript && tt.ScriptID == Script(x.script)
122							count++
123						}
124					}
125					if count == 1 {
126						return tt, nil
127					}
128					// Even if we fail to find a unique Region, we might have
129					// an unambiguous script.
130					if goodScript {
131						t.ScriptID = tt.ScriptID
132					}
133				}
134			}
135		}
136	} else {
137		// Search matches for und-script.
138		if t.ScriptID != 0 {
139			x := likelyScript[t.ScriptID]
140			if x.region != 0 {
141				t.setUndefinedRegion(Region(x.region))
142				t.setUndefinedLang(Language(x.lang))
143				return t, nil
144			}
145		}
146		// Search matches for und-region. If und-script-region exists, it would
147		// have been found earlier.
148		if t.RegionID != 0 {
149			if i := regionInclusion[t.RegionID]; i < nRegionGroups {
150				x := likelyRegionGroup[i]
151				if x.region != 0 {
152					t.setUndefinedLang(Language(x.lang))
153					t.setUndefinedScript(Script(x.script))
154					t.RegionID = Region(x.region)
155				}
156			} else {
157				x := likelyRegion[t.RegionID]
158				if x.flags&isList != 0 {
159					x = likelyRegionList[x.lang]
160				}
161				if x.script != 0 && x.flags != scriptInFrom {
162					t.setUndefinedLang(Language(x.lang))
163					t.setUndefinedScript(Script(x.script))
164					return t, nil
165				}
166			}
167		}
168	}
169
170	// Search matches for lang.
171	if t.LangID < langNoIndexOffset {
172		x := likelyLang[t.LangID]
173		if x.flags&isList != 0 {
174			x = likelyLangList[x.region]
175		}
176		if x.region != 0 {
177			t.setUndefinedScript(Script(x.script))
178			t.setUndefinedRegion(Region(x.region))
179		}
180		specializeRegion(&t)
181		if t.LangID == 0 {
182			t.LangID = _en // default language
183		}
184		return t, nil
185	}
186	return t, ErrMissingLikelyTagsData
187}
188
189func (t *Tag) setTagsFrom(id Tag) {
190	t.LangID = id.LangID
191	t.ScriptID = id.ScriptID
192	t.RegionID = id.RegionID
193}
194
195// minimize removes the region or script subtags from t such that
196// t.addLikelySubtags() == t.minimize().addLikelySubtags().
197func (t Tag) minimize() (Tag, error) {
198	t, err := minimizeTags(t)
199	if err != nil {
200		return t, err
201	}
202	t.RemakeString()
203	return t, nil
204}
205
206// minimizeTags mimics the behavior of the ICU 51 C implementation.
207func minimizeTags(t Tag) (Tag, error) {
208	if t.equalTags(Und) {
209		return t, nil
210	}
211	max, err := addTags(t)
212	if err != nil {
213		return t, err
214	}
215	for _, id := range [...]Tag{
216		{LangID: t.LangID},
217		{LangID: t.LangID, RegionID: t.RegionID},
218		{LangID: t.LangID, ScriptID: t.ScriptID},
219	} {
220		if x, err := addTags(id); err == nil && max.equalTags(x) {
221			t.setTagsFrom(id)
222			break
223		}
224	}
225	return t, nil
226}