1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package json
6
7import (
8 "bytes"
9 "errors"
10 "io"
11)
12
13// A Decoder reads and decodes JSON values from an input stream.
14type Decoder struct {
15 r io.Reader
16 buf []byte
17 d decodeState
18 scanp int // start of unread data in buf
19 scanned int64 // amount of data already scanned
20 scan scanner
21 err error
22
23 tokenState int
24 tokenStack []int
25}
26
27// NewDecoder returns a new decoder that reads from r.
28//
29// The decoder introduces its own buffering and may
30// read data from r beyond the JSON values requested.
31func NewDecoder(r io.Reader) *Decoder {
32 return &Decoder{r: r}
33}
34
35// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
36// Number instead of as a float64.
37func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
38
39// DisallowUnknownFields causes the Decoder to return an error when the destination
40// is a struct and the input contains object keys which do not match any
41// non-ignored, exported fields in the destination.
42func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
43
44// Decode reads the next JSON-encoded value from its
45// input and stores it in the value pointed to by v.
46//
47// See the documentation for Unmarshal for details about
48// the conversion of JSON into a Go value.
49func (dec *Decoder) Decode(v any) error {
50 if dec.err != nil {
51 return dec.err
52 }
53
54 if err := dec.tokenPrepareForDecode(); err != nil {
55 return err
56 }
57
58 if !dec.tokenValueAllowed() {
59 return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
60 }
61
62 // Read whole value into buffer.
63 n, err := dec.readValue()
64 if err != nil {
65 return err
66 }
67 dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
68 dec.scanp += n
69
70 // Don't save err from unmarshal into dec.err:
71 // the connection is still usable since we read a complete JSON
72 // object from it before the error happened.
73 err = dec.d.unmarshal(v)
74
75 // fixup token streaming state
76 dec.tokenValueEnd()
77
78 return err
79}
80
81// Buffered returns a reader of the data remaining in the Decoder's
82// buffer. The reader is valid until the next call to Decode.
83func (dec *Decoder) Buffered() io.Reader {
84 return bytes.NewReader(dec.buf[dec.scanp:])
85}
86
87// readValue reads a JSON value into dec.buf.
88// It returns the length of the encoding.
89func (dec *Decoder) readValue() (int, error) {
90 dec.scan.reset()
91
92 scanp := dec.scanp
93 var err error
94Input:
95 // help the compiler see that scanp is never negative, so it can remove
96 // some bounds checks below.
97 for scanp >= 0 {
98
99 // Look in the buffer for a new value.
100 for ; scanp < len(dec.buf); scanp++ {
101 c := dec.buf[scanp]
102 dec.scan.bytes++
103 switch dec.scan.step(&dec.scan, c) {
104 case scanEnd:
105 // scanEnd is delayed one byte so we decrement
106 // the scanner bytes count by 1 to ensure that
107 // this value is correct in the next call of Decode.
108 dec.scan.bytes--
109 break Input
110 case scanEndObject, scanEndArray:
111 // scanEnd is delayed one byte.
112 // We might block trying to get that byte from src,
113 // so instead invent a space byte.
114 if stateEndValue(&dec.scan, ' ') == scanEnd {
115 scanp++
116 break Input
117 }
118 case scanError:
119 dec.err = dec.scan.err
120 return 0, dec.scan.err
121 }
122 }
123
124 // Did the last read have an error?
125 // Delayed until now to allow buffer scan.
126 if err != nil {
127 if err == io.EOF {
128 if dec.scan.step(&dec.scan, ' ') == scanEnd {
129 break Input
130 }
131 if nonSpace(dec.buf) {
132 err = io.ErrUnexpectedEOF
133 }
134 }
135 dec.err = err
136 return 0, err
137 }
138
139 n := scanp - dec.scanp
140 err = dec.refill()
141 scanp = dec.scanp + n
142 }
143 return scanp - dec.scanp, nil
144}
145
146func (dec *Decoder) refill() error {
147 // Make room to read more into the buffer.
148 // First slide down data already consumed.
149 if dec.scanp > 0 {
150 dec.scanned += int64(dec.scanp)
151 n := copy(dec.buf, dec.buf[dec.scanp:])
152 dec.buf = dec.buf[:n]
153 dec.scanp = 0
154 }
155
156 // Grow buffer if not large enough.
157 const minRead = 512
158 if cap(dec.buf)-len(dec.buf) < minRead {
159 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
160 copy(newBuf, dec.buf)
161 dec.buf = newBuf
162 }
163
164 // Read. Delay error for next iteration (after scan).
165 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
166 dec.buf = dec.buf[0 : len(dec.buf)+n]
167
168 return err
169}
170
171func nonSpace(b []byte) bool {
172 for _, c := range b {
173 if !isSpace(c) {
174 return true
175 }
176 }
177 return false
178}
179
180// An Encoder writes JSON values to an output stream.
181type Encoder struct {
182 w io.Writer
183 err error
184 escapeHTML bool
185
186 indentBuf *bytes.Buffer
187 indentPrefix string
188 indentValue string
189}
190
191// NewEncoder returns a new encoder that writes to w.
192func NewEncoder(w io.Writer) *Encoder {
193 return &Encoder{w: w, escapeHTML: true}
194}
195
196// Encode writes the JSON encoding of v to the stream,
197// followed by a newline character.
198//
199// See the documentation for Marshal for details about the
200// conversion of Go values to JSON.
201func (enc *Encoder) Encode(v any) error {
202 if enc.err != nil {
203 return enc.err
204 }
205 e := newEncodeState()
206 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
207 if err != nil {
208 return err
209 }
210
211 // Terminate each value with a newline.
212 // This makes the output look a little nicer
213 // when debugging, and some kind of space
214 // is required if the encoded value was a number,
215 // so that the reader knows there aren't more
216 // digits coming.
217 e.WriteByte('\n')
218
219 b := e.Bytes()
220 if enc.indentPrefix != "" || enc.indentValue != "" {
221 if enc.indentBuf == nil {
222 enc.indentBuf = new(bytes.Buffer)
223 }
224 enc.indentBuf.Reset()
225 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
226 if err != nil {
227 return err
228 }
229 b = enc.indentBuf.Bytes()
230 }
231 if _, err = enc.w.Write(b); err != nil {
232 enc.err = err
233 }
234 encodeStatePool.Put(e)
235 return err
236}
237
238// SetIndent instructs the encoder to format each subsequent encoded
239// value as if indented by the package-level function Indent(dst, src, prefix, indent).
240// Calling SetIndent("", "") disables indentation.
241func (enc *Encoder) SetIndent(prefix, indent string) {
242 enc.indentPrefix = prefix
243 enc.indentValue = indent
244}
245
246// SetEscapeHTML specifies whether problematic HTML characters
247// should be escaped inside JSON quoted strings.
248// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
249// to avoid certain safety problems that can arise when embedding JSON in HTML.
250//
251// In non-HTML settings where the escaping interferes with the readability
252// of the output, SetEscapeHTML(false) disables this behavior.
253func (enc *Encoder) SetEscapeHTML(on bool) {
254 enc.escapeHTML = on
255}
256
257// RawMessage is a raw encoded JSON value.
258// It implements Marshaler and Unmarshaler and can
259// be used to delay JSON decoding or precompute a JSON encoding.
260type RawMessage []byte
261
262// MarshalJSON returns m as the JSON encoding of m.
263func (m RawMessage) MarshalJSON() ([]byte, error) {
264 if m == nil {
265 return []byte("null"), nil
266 }
267 return m, nil
268}
269
270// UnmarshalJSON sets *m to a copy of data.
271func (m *RawMessage) UnmarshalJSON(data []byte) error {
272 if m == nil {
273 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
274 }
275 *m = append((*m)[0:0], data...)
276 return nil
277}
278
279var _ Marshaler = (*RawMessage)(nil)
280var _ Unmarshaler = (*RawMessage)(nil)
281
282// A Token holds a value of one of these types:
283//
284// Delim, for the four JSON delimiters [ ] { }
285// bool, for JSON booleans
286// float64, for JSON numbers
287// Number, for JSON numbers
288// string, for JSON string literals
289// nil, for JSON null
290type Token any
291
292const (
293 tokenTopValue = iota
294 tokenArrayStart
295 tokenArrayValue
296 tokenArrayComma
297 tokenObjectStart
298 tokenObjectKey
299 tokenObjectColon
300 tokenObjectValue
301 tokenObjectComma
302)
303
304// advance tokenstate from a separator state to a value state
305func (dec *Decoder) tokenPrepareForDecode() error {
306 // Note: Not calling peek before switch, to avoid
307 // putting peek into the standard Decode path.
308 // peek is only called when using the Token API.
309 switch dec.tokenState {
310 case tokenArrayComma:
311 c, err := dec.peek()
312 if err != nil {
313 return err
314 }
315 if c != ',' {
316 return &SyntaxError{"expected comma after array element", dec.InputOffset()}
317 }
318 dec.scanp++
319 dec.tokenState = tokenArrayValue
320 case tokenObjectColon:
321 c, err := dec.peek()
322 if err != nil {
323 return err
324 }
325 if c != ':' {
326 return &SyntaxError{"expected colon after object key", dec.InputOffset()}
327 }
328 dec.scanp++
329 dec.tokenState = tokenObjectValue
330 }
331 return nil
332}
333
334func (dec *Decoder) tokenValueAllowed() bool {
335 switch dec.tokenState {
336 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
337 return true
338 }
339 return false
340}
341
342func (dec *Decoder) tokenValueEnd() {
343 switch dec.tokenState {
344 case tokenArrayStart, tokenArrayValue:
345 dec.tokenState = tokenArrayComma
346 case tokenObjectValue:
347 dec.tokenState = tokenObjectComma
348 }
349}
350
351// A Delim is a JSON array or object delimiter, one of [ ] { or }.
352type Delim rune
353
354func (d Delim) String() string {
355 return string(d)
356}
357
358// Token returns the next JSON token in the input stream.
359// At the end of the input stream, Token returns nil, io.EOF.
360//
361// Token guarantees that the delimiters [ ] { } it returns are
362// properly nested and matched: if Token encounters an unexpected
363// delimiter in the input, it will return an error.
364//
365// The input stream consists of basic JSON values—bool, string,
366// number, and null—along with delimiters [ ] { } of type Delim
367// to mark the start and end of arrays and objects.
368// Commas and colons are elided.
369func (dec *Decoder) Token() (Token, error) {
370 for {
371 c, err := dec.peek()
372 if err != nil {
373 return nil, err
374 }
375 switch c {
376 case '[':
377 if !dec.tokenValueAllowed() {
378 return dec.tokenError(c)
379 }
380 dec.scanp++
381 dec.tokenStack = append(dec.tokenStack, dec.tokenState)
382 dec.tokenState = tokenArrayStart
383 return Delim('['), nil
384
385 case ']':
386 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
387 return dec.tokenError(c)
388 }
389 dec.scanp++
390 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
391 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
392 dec.tokenValueEnd()
393 return Delim(']'), nil
394
395 case '{':
396 if !dec.tokenValueAllowed() {
397 return dec.tokenError(c)
398 }
399 dec.scanp++
400 dec.tokenStack = append(dec.tokenStack, dec.tokenState)
401 dec.tokenState = tokenObjectStart
402 return Delim('{'), nil
403
404 case '}':
405 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
406 return dec.tokenError(c)
407 }
408 dec.scanp++
409 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
410 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
411 dec.tokenValueEnd()
412 return Delim('}'), nil
413
414 case ':':
415 if dec.tokenState != tokenObjectColon {
416 return dec.tokenError(c)
417 }
418 dec.scanp++
419 dec.tokenState = tokenObjectValue
420 continue
421
422 case ',':
423 if dec.tokenState == tokenArrayComma {
424 dec.scanp++
425 dec.tokenState = tokenArrayValue
426 continue
427 }
428 if dec.tokenState == tokenObjectComma {
429 dec.scanp++
430 dec.tokenState = tokenObjectKey
431 continue
432 }
433 return dec.tokenError(c)
434
435 case '"':
436 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
437 var x string
438 old := dec.tokenState
439 dec.tokenState = tokenTopValue
440 err := dec.Decode(&x)
441 dec.tokenState = old
442 if err != nil {
443 return nil, err
444 }
445 dec.tokenState = tokenObjectColon
446 return x, nil
447 }
448 fallthrough
449
450 default:
451 if !dec.tokenValueAllowed() {
452 return dec.tokenError(c)
453 }
454 var x any
455 if err := dec.Decode(&x); err != nil {
456 return nil, err
457 }
458 return x, nil
459 }
460 }
461}
462
463func (dec *Decoder) tokenError(c byte) (Token, error) {
464 var context string
465 switch dec.tokenState {
466 case tokenTopValue:
467 context = " looking for beginning of value"
468 case tokenArrayStart, tokenArrayValue, tokenObjectValue:
469 context = " looking for beginning of value"
470 case tokenArrayComma:
471 context = " after array element"
472 case tokenObjectKey:
473 context = " looking for beginning of object key string"
474 case tokenObjectColon:
475 context = " after object key"
476 case tokenObjectComma:
477 context = " after object key:value pair"
478 }
479 return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
480}
481
482// More reports whether there is another element in the
483// current array or object being parsed.
484func (dec *Decoder) More() bool {
485 c, err := dec.peek()
486 return err == nil && c != ']' && c != '}'
487}
488
489func (dec *Decoder) peek() (byte, error) {
490 var err error
491 for {
492 for i := dec.scanp; i < len(dec.buf); i++ {
493 c := dec.buf[i]
494 if isSpace(c) {
495 continue
496 }
497 dec.scanp = i
498 return c, nil
499 }
500 // buffer has been scanned, now report any error
501 if err != nil {
502 return 0, err
503 }
504 err = dec.refill()
505 }
506}
507
508// InputOffset returns the input stream byte offset of the current decoder position.
509// The offset gives the location of the end of the most recently returned token
510// and the beginning of the next token.
511func (dec *Decoder) InputOffset() int64 {
512 return dec.scanned + int64(dec.scanp)
513}