fold.go

1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8	"bytes"
9	"unicode/utf8"
10)
11
12const (
13	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
14	kelvin       = '\u212a'
15	smallLongEss = '\u017f'
16)
17
18// foldFunc returns one of four different case folding equivalence
19// functions, from most general (and slow) to fastest:
20//
21// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
22// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
23// 3) asciiEqualFold, no special, but includes non-letters (including _)
24// 4) simpleLetterEqualFold, no specials, no non-letters.
25//
26// The letters S and K are special because they map to 3 runes, not just 2:
27//   - S maps to s and to U+017F 'ſ' Latin small letter long s
28//   - k maps to K and to U+212A 'K' Kelvin sign
29//
30// See https://play.golang.org/p/tTxjOc0OGo
31//
32// The returned function is specialized for matching against s and
33// should only be given s. It's not curried for performance reasons.
34func foldFunc(s []byte) func(s, t []byte) bool {
35	nonLetter := false
36	special := false // special letter
37	for _, b := range s {
38		if b >= utf8.RuneSelf {
39			return bytes.EqualFold
40		}
41		upper := b & caseMask
42		if upper < 'A' || upper > 'Z' {
43			nonLetter = true
44		} else if upper == 'K' || upper == 'S' {
45			// See above for why these letters are special.
46			special = true
47		}
48	}
49	if special {
50		return equalFoldRight
51	}
52	if nonLetter {
53		return asciiEqualFold
54	}
55	return simpleLetterEqualFold
56}
57
58// equalFoldRight is a specialization of bytes.EqualFold when s is
59// known to be all ASCII (including punctuation), but contains an 's',
60// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
61// See comments on foldFunc.
62func equalFoldRight(s, t []byte) bool {
63	for _, sb := range s {
64		if len(t) == 0 {
65			return false
66		}
67		tb := t[0]
68		if tb < utf8.RuneSelf {
69			if sb != tb {
70				sbUpper := sb & caseMask
71				if 'A' <= sbUpper && sbUpper <= 'Z' {
72					if sbUpper != tb&caseMask {
73						return false
74					}
75				} else {
76					return false
77				}
78			}
79			t = t[1:]
80			continue
81		}
82		// sb is ASCII and t is not. t must be either kelvin
83		// sign or long s; sb must be s, S, k, or K.
84		tr, size := utf8.DecodeRune(t)
85		switch sb {
86		case 's', 'S':
87			if tr != smallLongEss {
88				return false
89			}
90		case 'k', 'K':
91			if tr != kelvin {
92				return false
93			}
94		default:
95			return false
96		}
97		t = t[size:]
98
99	}
100	if len(t) > 0 {
101		return false
102	}
103	return true
104}
105
106// asciiEqualFold is a specialization of bytes.EqualFold for use when
107// s is all ASCII (but may contain non-letters) and contains no
108// special-folding letters.
109// See comments on foldFunc.
110func asciiEqualFold(s, t []byte) bool {
111	if len(s) != len(t) {
112		return false
113	}
114	for i, sb := range s {
115		tb := t[i]
116		if sb == tb {
117			continue
118		}
119		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
120			if sb&caseMask != tb&caseMask {
121				return false
122			}
123		} else {
124			return false
125		}
126	}
127	return true
128}
129
130// simpleLetterEqualFold is a specialization of bytes.EqualFold for
131// use when s is all ASCII letters (no underscores, etc) and also
132// doesn't contain 'k', 'K', 's', or 'S'.
133// See comments on foldFunc.
134func simpleLetterEqualFold(s, t []byte) bool {
135	if len(s) != len(t) {
136		return false
137	}
138	for i, b := range s {
139		if b&caseMask != t[i]&caseMask {
140			return false
141		}
142	}
143	return true