1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
9 "unicode/utf8"
10)
11
12const (
13 caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
14 kelvin = '\u212a'
15 smallLongEss = '\u017f'
16)
17
18// foldFunc returns one of four different case folding equivalence
19// functions, from most general (and slow) to fastest:
20//
21// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
22// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
23// 3) asciiEqualFold, no special, but includes non-letters (including _)
24// 4) simpleLetterEqualFold, no specials, no non-letters.
25//
26// The letters S and K are special because they map to 3 runes, not just 2:
27// - S maps to s and to U+017F 'ſ' Latin small letter long s
28// - k maps to K and to U+212A 'K' Kelvin sign
29//
30// See https://play.golang.org/p/tTxjOc0OGo
31//
32// The returned function is specialized for matching against s and
33// should only be given s. It's not curried for performance reasons.
34func foldFunc(s []byte) func(s, t []byte) bool {
35 nonLetter := false
36 special := false // special letter
37 for _, b := range s {
38 if b >= utf8.RuneSelf {
39 return bytes.EqualFold
40 }
41 upper := b & caseMask
42 if upper < 'A' || upper > 'Z' {
43 nonLetter = true
44 } else if upper == 'K' || upper == 'S' {
45 // See above for why these letters are special.
46 special = true
47 }
48 }
49 if special {
50 return equalFoldRight
51 }
52 if nonLetter {
53 return asciiEqualFold
54 }
55 return simpleLetterEqualFold
56}
57
58// equalFoldRight is a specialization of bytes.EqualFold when s is
59// known to be all ASCII (including punctuation), but contains an 's',
60// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
61// See comments on foldFunc.
62func equalFoldRight(s, t []byte) bool {
63 for _, sb := range s {
64 if len(t) == 0 {
65 return false
66 }
67 tb := t[0]
68 if tb < utf8.RuneSelf {
69 if sb != tb {
70 sbUpper := sb & caseMask
71 if 'A' <= sbUpper && sbUpper <= 'Z' {
72 if sbUpper != tb&caseMask {
73 return false
74 }
75 } else {
76 return false
77 }
78 }
79 t = t[1:]
80 continue
81 }
82 // sb is ASCII and t is not. t must be either kelvin
83 // sign or long s; sb must be s, S, k, or K.
84 tr, size := utf8.DecodeRune(t)
85 switch sb {
86 case 's', 'S':
87 if tr != smallLongEss {
88 return false
89 }
90 case 'k', 'K':
91 if tr != kelvin {
92 return false
93 }
94 default:
95 return false
96 }
97 t = t[size:]
98
99 }
100 if len(t) > 0 {
101 return false
102 }
103 return true
104}
105
106// asciiEqualFold is a specialization of bytes.EqualFold for use when
107// s is all ASCII (but may contain non-letters) and contains no
108// special-folding letters.
109// See comments on foldFunc.
110func asciiEqualFold(s, t []byte) bool {
111 if len(s) != len(t) {
112 return false
113 }
114 for i, sb := range s {
115 tb := t[i]
116 if sb == tb {
117 continue
118 }
119 if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
120 if sb&caseMask != tb&caseMask {
121 return false
122 }
123 } else {
124 return false
125 }
126 }
127 return true
128}
129
130// simpleLetterEqualFold is a specialization of bytes.EqualFold for
131// use when s is all ASCII letters (no underscores, etc) and also
132// doesn't contain 'k', 'K', 's', or 'S'.
133// See comments on foldFunc.
134func simpleLetterEqualFold(s, t []byte) bool {
135 if len(s) != len(t) {
136 return false
137 }
138 for i, b := range s {
139 if b&caseMask != t[i]&caseMask {
140 return false
141 }
142 }
143 return true