1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package strings
6
7import (
8 "io"
9 "sync"
10)
11
12// Replacer replaces a list of strings with replacements.
13// It is safe for concurrent use by multiple goroutines.
14type Replacer struct {
15 once sync.Once // guards buildOnce method
16 r replacer
17 oldnew []string
18}
19
20// replacer is the interface that a replacement algorithm needs to implement.
21type replacer interface {
22 Replace(s string) string
23 WriteString(w io.Writer, s string) (n int, err error)
24}
25
26// NewReplacer returns a new Replacer from a list of old, new string
27// pairs. Replacements are performed in the order they appear in the
28// target string, without overlapping matches. The old string
29// comparisons are done in argument order.
30//
31// NewReplacer panics if given an odd number of arguments.
32func NewReplacer(oldnew ...string) *Replacer {
33 if len(oldnew)%2 == 1 {
34 panic("strings.NewReplacer: odd argument count")
35 }
36 return &Replacer{oldnew: append([]string(nil), oldnew...)}
37}
38
39func (r *Replacer) buildOnce() {
40 r.r = r.build()
41 r.oldnew = nil
42}
43
44func (b *Replacer) build() replacer {
45 oldnew := b.oldnew
46 if len(oldnew) == 2 && len(oldnew[0]) > 1 {
47 return makeSingleStringReplacer(oldnew[0], oldnew[1])
48 }
49
50 allNewBytes := true
51 for i := 0; i < len(oldnew); i += 2 {
52 if len(oldnew[i]) != 1 {
53 return makeGenericReplacer(oldnew)
54 }
55 if len(oldnew[i+1]) != 1 {
56 allNewBytes = false
57 }
58 }
59
60 if allNewBytes {
61 r := byteReplacer{}
62 for i := range r {
63 r[i] = byte(i)
64 }
65 // The first occurrence of old->new map takes precedence
66 // over the others with the same old string.
67 for i := len(oldnew) - 2; i >= 0; i -= 2 {
68 o := oldnew[i][0]
69 n := oldnew[i+1][0]
70 r[o] = n
71 }
72 return &r
73 }
74
75 r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
76 // The first occurrence of old->new map takes precedence
77 // over the others with the same old string.
78 for i := len(oldnew) - 2; i >= 0; i -= 2 {
79 o := oldnew[i][0]
80 n := oldnew[i+1]
81 // To avoid counting repetitions multiple times.
82 if r.replacements[o] == nil {
83 // We need to use string([]byte{o}) instead of string(o),
84 // to avoid utf8 encoding of o.
85 // E. g. byte(150) produces string of length 2.
86 r.toReplace = append(r.toReplace, string([]byte{o}))
87 }
88 r.replacements[o] = []byte(n)
89
90 }
91 return &r
92}
93
94// Replace returns a copy of s with all replacements performed.
95func (r *Replacer) Replace(s string) string {
96 r.once.Do(r.buildOnce)
97 return r.r.Replace(s)
98}
99
100// WriteString writes s to w with all replacements performed.
101func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
102 r.once.Do(r.buildOnce)
103 return r.r.WriteString(w, s)
104}
105
106// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
107// and values may be empty. For example, the trie containing keys "ax", "ay",
108// "bcbc", "x" and "xy" could have eight nodes:
109//
110// n0 -
111// n1 a-
112// n2 .x+
113// n3 .y+
114// n4 b-
115// n5 .cbc+
116// n6 x+
117// n7 .y+
118//
119// n0 is the root node, and its children are n1, n4 and n6; n1's children are
120// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
121// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
122// (marked with a trailing "+") are complete keys.
123type trieNode struct {
124 // value is the value of the trie node's key/value pair. It is empty if
125 // this node is not a complete key.
126 value string
127 // priority is the priority (higher is more important) of the trie node's
128 // key/value pair; keys are not necessarily matched shortest- or longest-
129 // first. Priority is positive if this node is a complete key, and zero
130 // otherwise. In the example above, positive/zero priorities are marked
131 // with a trailing "+" or "-".
132 priority int
133
134 // A trie node may have zero, one or more child nodes:
135 // * if the remaining fields are zero, there are no children.
136 // * if prefix and next are non-zero, there is one child in next.
137 // * if table is non-zero, it defines all the children.
138 //
139 // Prefixes are preferred over tables when there is one child, but the
140 // root node always uses a table for lookup efficiency.
141
142 // prefix is the difference in keys between this trie node and the next.
143 // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
144 // Node n5 has no children and so has zero prefix, next and table fields.
145 prefix string
146 next *trieNode
147
148 // table is a lookup table indexed by the next byte in the key, after
149 // remapping that byte through genericReplacer.mapping to create a dense
150 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
151 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
152 // genericReplacer.tableSize will be 5. Node n0's table will be
153 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
154 // 'a', 'b' and 'x'.
155 table []*trieNode
156}
157
158func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
159 if key == "" {
160 if t.priority == 0 {
161 t.value = val
162 t.priority = priority
163 }
164 return
165 }
166
167 if t.prefix != "" {
168 // Need to split the prefix among multiple nodes.
169 var n int // length of the longest common prefix
170 for ; n < len(t.prefix) && n < len(key); n++ {
171 if t.prefix[n] != key[n] {
172 break
173 }
174 }
175 if n == len(t.prefix) {
176 t.next.add(key[n:], val, priority, r)
177 } else if n == 0 {
178 // First byte differs, start a new lookup table here. Looking up
179 // what is currently t.prefix[0] will lead to prefixNode, and
180 // looking up key[0] will lead to keyNode.
181 var prefixNode *trieNode
182 if len(t.prefix) == 1 {
183 prefixNode = t.next
184 } else {
185 prefixNode = &trieNode{
186 prefix: t.prefix[1:],
187 next: t.next,
188 }
189 }
190 keyNode := new(trieNode)
191 t.table = make([]*trieNode, r.tableSize)
192 t.table[r.mapping[t.prefix[0]]] = prefixNode
193 t.table[r.mapping[key[0]]] = keyNode
194 t.prefix = ""
195 t.next = nil
196 keyNode.add(key[1:], val, priority, r)
197 } else {
198 // Insert new node after the common section of the prefix.
199 next := &trieNode{
200 prefix: t.prefix[n:],
201 next: t.next,
202 }
203 t.prefix = t.prefix[:n]
204 t.next = next
205 next.add(key[n:], val, priority, r)
206 }
207 } else if t.table != nil {
208 // Insert into existing table.
209 m := r.mapping[key[0]]
210 if t.table[m] == nil {
211 t.table[m] = new(trieNode)
212 }
213 t.table[m].add(key[1:], val, priority, r)
214 } else {
215 t.prefix = key
216 t.next = new(trieNode)
217 t.next.add("", val, priority, r)
218 }
219}
220
221func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
222 // Iterate down the trie to the end, and grab the value and keylen with
223 // the highest priority.
224 bestPriority := 0
225 node := &r.root
226 n := 0
227 for node != nil {
228 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
229 bestPriority = node.priority
230 val = node.value
231 keylen = n
232 found = true
233 }
234
235 if s == "" {
236 break
237 }
238 if node.table != nil {
239 index := r.mapping[s[0]]
240 if int(index) == r.tableSize {
241 break
242 }
243 node = node.table[index]
244 s = s[1:]
245 n++
246 } else if node.prefix != "" && HasPrefix(s, node.prefix) {
247 n += len(node.prefix)
248 s = s[len(node.prefix):]
249 node = node.next
250 } else {
251 break
252 }
253 }
254 return
255}
256
257// genericReplacer is the fully generic algorithm.
258// It's used as a fallback when nothing faster can be used.
259type genericReplacer struct {
260 root trieNode
261 // tableSize is the size of a trie node's lookup table. It is the number
262 // of unique key bytes.
263 tableSize int
264 // mapping maps from key bytes to a dense index for trieNode.table.
265 mapping [256]byte
266}
267
268func makeGenericReplacer(oldnew []string) *genericReplacer {
269 r := new(genericReplacer)
270 // Find each byte used, then assign them each an index.
271 for i := 0; i < len(oldnew); i += 2 {
272 key := oldnew[i]
273 for j := 0; j < len(key); j++ {
274 r.mapping[key[j]] = 1
275 }
276 }
277
278 for _, b := range r.mapping {
279 r.tableSize += int(b)
280 }
281
282 var index byte
283 for i, b := range r.mapping {
284 if b == 0 {
285 r.mapping[i] = byte(r.tableSize)
286 } else {
287 r.mapping[i] = index
288 index++
289 }
290 }
291 // Ensure root node uses a lookup table (for performance).
292 r.root.table = make([]*trieNode, r.tableSize)
293
294 for i := 0; i < len(oldnew); i += 2 {
295 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
296 }
297 return r
298}
299
300type appendSliceWriter []byte
301
302// Write writes to the buffer to satisfy io.Writer.
303func (w *appendSliceWriter) Write(p []byte) (int, error) {
304 *w = append(*w, p...)
305 return len(p), nil
306}
307
308// WriteString writes to the buffer without string->[]byte->string allocations.
309func (w *appendSliceWriter) WriteString(s string) (int, error) {
310 *w = append(*w, s...)
311 return len(s), nil
312}
313
314type stringWriter struct {
315 w io.Writer
316}
317
318func (w stringWriter) WriteString(s string) (int, error) {
319 return w.w.Write([]byte(s))
320}
321
322func getStringWriter(w io.Writer) io.StringWriter {
323 sw, ok := w.(io.StringWriter)
324 if !ok {
325 sw = stringWriter{w}
326 }
327 return sw
328}
329
330func (r *genericReplacer) Replace(s string) string {
331 buf := make(appendSliceWriter, 0, len(s))
332 r.WriteString(&buf, s)
333 return string(buf)
334}
335
336func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
337 sw := getStringWriter(w)
338 var last, wn int
339 var prevMatchEmpty bool
340 for i := 0; i <= len(s); {
341 // Fast path: s[i] is not a prefix of any pattern.
342 if i != len(s) && r.root.priority == 0 {
343 index := int(r.mapping[s[i]])
344 if index == r.tableSize || r.root.table[index] == nil {
345 i++
346 continue
347 }
348 }
349
350 // Ignore the empty match iff the previous loop found the empty match.
351 val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
352 prevMatchEmpty = match && keylen == 0
353 if match {
354 wn, err = sw.WriteString(s[last:i])
355 n += wn
356 if err != nil {
357 return
358 }
359 wn, err = sw.WriteString(val)
360 n += wn
361 if err != nil {
362 return
363 }
364 i += keylen
365 last = i
366 continue
367 }
368 i++
369 }
370 if last != len(s) {
371 wn, err = sw.WriteString(s[last:])
372 n += wn
373 }
374 return
375}
376
377// singleStringReplacer is the implementation that's used when there is only
378// one string to replace (and that string has more than one byte).
379type singleStringReplacer struct {
380 finder *stringFinder
381 // value is the new string that replaces that pattern when it's found.
382 value string
383}
384
385func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
386 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
387}
388
389func (r *singleStringReplacer) Replace(s string) string {
390 var buf Builder
391 i, matched := 0, false
392 for {
393 match := r.finder.next(s[i:])
394 if match == -1 {
395 break
396 }
397 matched = true
398 buf.Grow(match + len(r.value))
399 buf.WriteString(s[i : i+match])
400 buf.WriteString(r.value)
401 i += match + len(r.finder.pattern)
402 }
403 if !matched {
404 return s
405 }
406 buf.WriteString(s[i:])
407 return buf.String()
408}
409
410func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
411 sw := getStringWriter(w)
412 var i, wn int
413 for {
414 match := r.finder.next(s[i:])
415 if match == -1 {
416 break
417 }
418 wn, err = sw.WriteString(s[i : i+match])
419 n += wn
420 if err != nil {
421 return
422 }
423 wn, err = sw.WriteString(r.value)
424 n += wn
425 if err != nil {
426 return
427 }
428 i += match + len(r.finder.pattern)
429 }
430 wn, err = sw.WriteString(s[i:])
431 n += wn
432 return
433}
434
435// byteReplacer is the implementation that's used when all the "old"
436// and "new" values are single ASCII bytes.
437// The array contains replacement bytes indexed by old byte.
438type byteReplacer [256]byte
439
440func (r *byteReplacer) Replace(s string) string {
441 var buf []byte // lazily allocated
442 for i := 0; i < len(s); i++ {
443 b := s[i]
444 if r[b] != b {
445 if buf == nil {
446 buf = []byte(s)
447 }
448 buf[i] = r[b]
449 }
450 }
451 if buf == nil {
452 return s
453 }
454 return string(buf)
455}
456
457func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
458 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
459 bufsize := 32 << 10
460 if len(s) < bufsize {
461 bufsize = len(s)
462 }
463 buf := make([]byte, bufsize)
464
465 for len(s) > 0 {
466 ncopy := copy(buf, s)
467 s = s[ncopy:]
468 for i, b := range buf[:ncopy] {
469 buf[i] = r[b]
470 }
471 wn, err := w.Write(buf[:ncopy])
472 n += wn
473 if err != nil {
474 return n, err
475 }
476 }
477 return n, nil
478}
479
480// byteStringReplacer is the implementation that's used when all the
481// "old" values are single ASCII bytes but the "new" values vary in size.
482type byteStringReplacer struct {
483 // replacements contains replacement byte slices indexed by old byte.
484 // A nil []byte means that the old byte should not be replaced.
485 replacements [256][]byte
486 // toReplace keeps a list of bytes to replace. Depending on length of toReplace
487 // and length of target string it may be faster to use Count, or a plain loop.
488 // We store single byte as a string, because Count takes a string.
489 toReplace []string
490}
491
492// countCutOff controls the ratio of a string length to a number of replacements
493// at which (*byteStringReplacer).Replace switches algorithms.
494// For strings with higher ration of length to replacements than that value,
495// we call Count, for each replacement from toReplace.
496// For strings, with a lower ratio we use simple loop, because of Count overhead.
497// countCutOff is an empirically determined overhead multiplier.
498// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
499const countCutOff = 8
500
501func (r *byteStringReplacer) Replace(s string) string {
502 newSize := len(s)
503 anyChanges := false
504 // Is it faster to use Count?
505 if len(r.toReplace)*countCutOff <= len(s) {
506 for _, x := range r.toReplace {
507 if c := Count(s, x); c != 0 {
508 // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
509 newSize += c * (len(r.replacements[x[0]]) - 1)
510 anyChanges = true
511 }
512
513 }
514 } else {
515 for i := 0; i < len(s); i++ {
516 b := s[i]
517 if r.replacements[b] != nil {
518 // See above for explanation of -1
519 newSize += len(r.replacements[b]) - 1
520 anyChanges = true
521 }
522 }
523 }
524 if !anyChanges {
525 return s
526 }
527 buf := make([]byte, newSize)
528 j := 0
529 for i := 0; i < len(s); i++ {
530 b := s[i]
531 if r.replacements[b] != nil {
532 j += copy(buf[j:], r.replacements[b])
533 } else {
534 buf[j] = b
535 j++
536 }
537 }
538 return string(buf)
539}
540
541func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
542 sw := getStringWriter(w)
543 last := 0
544 for i := 0; i < len(s); i++ {
545 b := s[i]
546 if r.replacements[b] == nil {
547 continue
548 }
549 if last != i {
550 nw, err := sw.WriteString(s[last:i])
551 n += nw
552 if err != nil {
553 return n, err
554 }
555 }
556 last = i + 1
557 nw, err := w.Write(r.replacements[b])
558 n += nw
559 if err != nil {
560 return n, err
561 }
562 }
563 if last != len(s) {
564 var nw int
565 nw, err = sw.WriteString(s[last:])
566 n += nw
567 }
568 return
569}