From d1b5bc1f05624614f43ef85597f4aa9d7a166d23 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 29 Jan 2023 17:40:13 -0700 Subject: parse: Add an example of how the stack works for arrays, add tests --- internal/parse.go | 38 ++++++++++++++++++------ internal/parse_test.go | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 internal/parse_test.go diff --git a/internal/parse.go b/internal/parse.go index bb849e7..121857b 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -268,6 +268,8 @@ type Parser struct { // // Within each element type, the stack item is replaced, not pushed. // + // (Keep each of these examples in-sync with parse_test.go.) + // // For example, given the input string // // {"x":"y","a":"b"} @@ -293,9 +295,34 @@ type Parser struct { // o" {"x":"y","a":"b // o {"x":"y","a":"b" // {"x":"y","a":"b"} + // + // Or, given the input string + // + // ["x","y"] + // + // The stack would be + // + // stack processed + // ? + // [ [ + // a" [" + // a" ["x + // a ["x" + // ] ["x", + // a" ["x"," + // a" ["x","y + // a ["x","y" + // ["x","y"] stack []RuneType } +func (par *Parser) init() { + if !par.initialized { + par.initialized = true + par.pushState(runeTypeAny) + } +} + func (par *Parser) pushState(state RuneType) RuneType { par.stack = append(par.stack, state) return state @@ -311,6 +338,7 @@ func (par *Parser) popState() { } func (par *Parser) stackString() string { + par.init() var buf strings.Builder for _, s := range par.stack { buf.WriteString(s.String()) @@ -351,10 +379,7 @@ func (par *Parser) HandleEOF() (RuneType, error) { if par.err != nil { return RuneTypeError, par.err } - if !par.initialized { - par.initialized = true - par.pushState(runeTypeAny) - } + par.init() switch len(par.stack) { case 0: return RuneTypeEOF, nil @@ -395,10 +420,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { if par.err != nil { return RuneTypeError, par.err } - if !par.initialized { - par.initialized = true - par.pushState(runeTypeAny) - } + par.init() if len(par.stack) == 0 { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: diff --git a/internal/parse_test.go b/internal/parse_test.go new file mode 100644 index 0000000..91cd277 --- /dev/null +++ b/internal/parse_test.go @@ -0,0 +1,78 @@ +// Copyright (C) 2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParserHandleRune(t *testing.T) { + t.Parallel() + type testcase struct { + Input string + ExpStack []string + } + testcases := map[string]testcase{ + // Keep these test-cases in-sync with the examples in parse.go. + "object": { + Input: `{"x":"y","a":"b"}`, + ExpStack: []string{ + // st,// processed + `?`, + `{`, // { + `»"`, // {" + `»"`, // {"x + `»`, // {"x" + `o?`, // {"x": + `o"`, // {"x":" + `o"`, // {"x":"y + `o`, // {"x":"y" + `{`, // {"x":"y", + `»"`, // {"x":"y"," + `»"`, // {"x":"y","a + `»`, // {"x":"y","a" + `o?`, // {"x":"y","a": + `o"`, // {"x":"y","a":" + `o"`, // {"x":"y","a":"b + `o`, // {"x":"y","a":"b" + ``, // {"x":"y","a":"b"} + }, + }, + "array": { + Input: `["x","y"]`, + ExpStack: []string{ + // st,// processed + `?`, + `[`, // [ + `a"`, // [" + `a"`, // ["x + `a`, // ["x" + `]`, // ["x", + `a"`, // ["x"," + `a"`, // ["x","y + `a`, // ["x","y" + ``, // ["x","y"] + }, + }, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + var par Parser + if !assert.Equal(t, len(tc.Input)+1, len(tc.ExpStack)) { + return + } + for i, r := range tc.Input { + assert.Equal(t, tc.ExpStack[i], par.stackString()) + _, err := par.HandleRune(r) + assert.NoError(t, err) + assert.Equal(t, tc.ExpStack[i+1], par.stackString()) + } + }) + } +} -- cgit v1.1-4-g5e80 From ff6dc0bc519886905e758a84e572f5e34d6c03d1 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 26 Jan 2023 22:31:32 -0700 Subject: Move things between files --- common.go | 16 ++++ encode.go | 23 ------ encode_escape.go | 103 +++++++++++++++++++++++ encode_string.go | 111 +++++++++++++++++++++++++ internal/encode.go | 14 ++++ internal/export_tags.go | 16 ---- internal/tags.go | 7 ++ ioutil.go | 31 +++++++ misc.go | 211 ------------------------------------------------ struct.go | 4 +- test_export.go | 18 ----- 11 files changed, 285 insertions(+), 269 deletions(-) create mode 100644 common.go create mode 100644 encode_escape.go create mode 100644 encode_string.go create mode 100644 internal/encode.go delete mode 100644 internal/export_tags.go create mode 100644 internal/tags.go create mode 100644 ioutil.go delete mode 100644 misc.go delete mode 100644 test_export.go diff --git a/common.go b/common.go new file mode 100644 index 0000000..90156b9 --- /dev/null +++ b/common.go @@ -0,0 +1,16 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "encoding/json" + "reflect" +) + +var ( + numberType = reflect.TypeOf(json.Number("")) + byteType = reflect.TypeOf(byte(0)) + byteSliceType = reflect.TypeOf(([]byte)(nil)) +) diff --git a/encode.go b/encode.go index 41032e5..fa337ad 100644 --- a/encode.go +++ b/encode.go @@ -17,7 +17,6 @@ import ( "sort" "strconv" "strings" - "unicode/utf8" "unsafe" ) @@ -426,28 +425,6 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool } } -func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { - encodeWriteByte(w, '"') - for _, c := range str { - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) - } - } - encodeWriteByte(w, '"') -} - -func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { - encodeWriteByte(w, '"') - for i := 0; i < len(str); { - c, size := utf8.DecodeRune(str[i:]) - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) - } - i += size - } - encodeWriteByte(w, '"') -} - func encodeArray(w io.Writer, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { encodeWriteByte(w, '[') n := val.Len() diff --git a/encode_escape.go b/encode_escape.go new file mode 100644 index 0000000..ab0d9c1 --- /dev/null +++ b/encode_escape.go @@ -0,0 +1,103 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "unicode/utf8" +) + +// BackslashEscapeMode identifies one of the three ways that a +// character may be represented in a JSON string: +// +// - literally (no backslash escaping) +// +// - as a short "well-known" `\X` backslash sequence (where `X` is a +// single-character) +// +// - as a long Unicode `\uXXXX` backslash sequence +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// A BackslashEscaper controls how a ReEncoder emits a character in a +// JSON string. The `rune` argument is the character being +// considered, and the `BackslashEscapeMode` argument is how it was +// originally encoded in the input. +// +// The ReEncoder will panic if a BackslashEscaper returns an unknown +// BackslashEscapeMode. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +// EscapePreserve is a BackslashEscaper that preserves the original +// input escaping. +func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + return wasEscaped +} + +// EscapeJSSafe is a BackslashEscaper that escapes strings such that +// the JSON safe to embed in JS; it otherwise preserves the original +// input escaping. +// +// JSON is notionally a JS subset, but that's not actually true; so +// more conservative backslash-escaping is necessary to safely embed +// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset +func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\u2028', '\u2029': + return BackslashEscapeUnicode + default: + return wasEscaped + } +} + +// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that +// the JSON is safe to embed in HTML; it otherwise preserves the +// original input escaping. +func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '&', '<', '>': + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } +} + +// EscapeDefault is a BackslashEscaper that mimics the default +// behavior of encoding/json. +// +// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` +// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement +// character. +// +// A ReEncoder uses EscapeDefault if a BackslashEscaper is not +// specified. +func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeHTMLSafe(c, wasEscaped) + } +} + +// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the +// default behavior of an encoding/json.Encoder that has had +// SetEscapeHTML(false) called on it. +// +// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` +// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement +// character. +func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } +} diff --git a/encode_string.go b/encode_string.go new file mode 100644 index 0000000..c5cb442 --- /dev/null +++ b/encode_string.go @@ -0,0 +1,111 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal" +) + +func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + buf := [6]byte{ + '\\', + 'u', + internal.Hex[(c>>12)&0xf], + internal.Hex[(c>>8)&0xf], + internal.Hex[(c>>4)&0xf], + internal.Hex[(c>>0)&0xf], + } + return w.Write(buf[:]) +} + +func writeStringShortEscape(w io.Writer, c rune) (int, error) { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic("should not happen") + } + buf := [2]byte{'\\', b} + return w.Write(buf[:]) +} + +func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { + if escaper == nil { + escaper = EscapeDefault + } + switch escaper(c, wasEscaped) { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + return writeRune(w, c) + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + return writeRune(w, c) + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + return writeRune(w, c) + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { + encodeWriteByte(w, '"') + for _, c := range str { + if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { + panic(encodeError{err}) + } + } + encodeWriteByte(w, '"') +} + +func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { + encodeWriteByte(w, '"') + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { + panic(encodeError{err}) + } + i += size + } + encodeWriteByte(w, '"') +} + +func init() { + internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } + internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } +} diff --git a/internal/encode.go b/internal/encode.go new file mode 100644 index 0000000..8aae673 --- /dev/null +++ b/internal/encode.go @@ -0,0 +1,14 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "io" +) + +var ( + EncodeStringFromBytes func(io.Writer, []byte) + EncodeStringFromString func(io.Writer, string) +) diff --git a/internal/export_tags.go b/internal/export_tags.go deleted file mode 100644 index d8cf622..0000000 --- a/internal/export_tags.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "io" -) - -var ParseTag = parseTag - -var ( - EncodeStringFromBytes func(io.Writer, []byte) - EncodeStringFromString func(io.Writer, string) -) diff --git a/internal/tags.go b/internal/tags.go new file mode 100644 index 0000000..bdf1f72 --- /dev/null +++ b/internal/tags.go @@ -0,0 +1,7 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +var ParseTag = parseTag diff --git a/ioutil.go b/ioutil.go new file mode 100644 index 0000000..a53eac3 --- /dev/null +++ b/ioutil.go @@ -0,0 +1,31 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "io" + "unicode/utf8" +) + +func writeByte(w io.Writer, c byte) error { + if br, ok := w.(interface{ WriteByte(byte) error }); ok { + return br.WriteByte(c) + } + var buf [1]byte + buf[0] = c + if _, err := w.Write(buf[:]); err != nil { + return err + } + return nil +} + +func writeRune(w io.Writer, c rune) (int, error) { + if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { + return rw.WriteRune(c) + } + var buf [utf8.UTFMax]byte + n := utf8.EncodeRune(buf[:], c) + return w.Write(buf[:n]) +} diff --git a/misc.go b/misc.go deleted file mode 100644 index fb96b4e..0000000 --- a/misc.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "encoding/json" - "io" - "reflect" - "unicode/utf8" - - "git.lukeshu.com/go/lowmemjson/internal" -) - -var ( - numberType = reflect.TypeOf(json.Number("")) - byteType = reflect.TypeOf(byte(0)) - byteSliceType = reflect.TypeOf(([]byte)(nil)) -) - -// generic I/O ///////////////////////////////////////////////////////////////// - -func writeByte(w io.Writer, c byte) error { - if br, ok := w.(interface{ WriteByte(byte) error }); ok { - return br.WriteByte(c) - } - var buf [1]byte - buf[0] = c - if _, err := w.Write(buf[:]); err != nil { - return err - } - return nil -} - -func writeRune(w io.Writer, c rune) (int, error) { - if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { - return rw.WriteRune(c) - } - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], c) - return w.Write(buf[:n]) -} - -// JSON string encoding //////////////////////////////////////////////////////// - -// BackslashEscapeMode identifies one of the three ways that a -// character may be represented in a JSON string: -// -// - literally (no backslash escaping) -// -// - as a short "well-known" `\X` backslash sequence (where `X` is a -// single-character) -// -// - as a long Unicode `\uXXXX` backslash sequence -type BackslashEscapeMode uint8 - -const ( - BackslashEscapeNone BackslashEscapeMode = iota - BackslashEscapeShort - BackslashEscapeUnicode -) - -// A BackslashEscaper controls how a ReEncoder emits a character in a -// JSON string. The `rune` argument is the character being -// considered, and the `BackslashEscapeMode` argument is how it was -// originally encoded in the input. -// -// The ReEncoder will panic if a BackslashEscaper returns an unknown -// BackslashEscapeMode. -type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode - -// EscapePreserve is a BackslashEscaper that preserves the original -// input escaping. -func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - return wasEscaped -} - -// EscapeJSSafe is a BackslashEscaper that escapes strings such that -// the JSON safe to embed in JS; it otherwise preserves the original -// input escaping. -// -// JSON is notionally a JS subset, but that's not actually true; so -// more conservative backslash-escaping is necessary to safely embed -// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset -func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\u2028', '\u2029': - return BackslashEscapeUnicode - default: - return wasEscaped - } -} - -// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that -// the JSON is safe to embed in HTML; it otherwise preserves the -// original input escaping. -func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '&', '<', '>': - return BackslashEscapeUnicode - default: - return EscapeJSSafe(c, wasEscaped) - } -} - -// EscapeDefault is a BackslashEscaper that mimics the default -// behavior of encoding/json. -// -// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` -// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement -// character. -// -// A ReEncoder uses EscapeDefault if a BackslashEscaper is not -// specified. -func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\b', '\f', utf8.RuneError: - return BackslashEscapeUnicode - default: - return EscapeHTMLSafe(c, wasEscaped) - } -} - -// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the -// default behavior of an encoding/json.Encoder that has had -// SetEscapeHTML(false) called on it. -// -// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` -// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement -// character. -func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\b', '\f', utf8.RuneError: - return BackslashEscapeUnicode - default: - return EscapeJSSafe(c, wasEscaped) - } -} - -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { - buf := [6]byte{ - '\\', - 'u', - internal.Hex[(c>>12)&0xf], - internal.Hex[(c>>8)&0xf], - internal.Hex[(c>>4)&0xf], - internal.Hex[(c>>0)&0xf], - } - return w.Write(buf[:]) -} - -func writeStringShortEscape(w io.Writer, c rune) (int, error) { - var b byte - switch c { - case '"', '\\', '/': - b = byte(c) - case '\b': - b = 'b' - case '\f': - b = 'f' - case '\n': - b = 'n' - case '\r': - b = 'r' - case '\t': - b = 't' - default: - panic("should not happen") - } - buf := [2]byte{'\\', b} - return w.Write(buf[:]) -} - -func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { - if escaper == nil { - escaper = EscapeDefault - } - switch escaper(c, wasEscaped) { - case BackslashEscapeNone: - switch { - case c < 0x0020: // override, gotta escape these - switch c { - case '\b', '\f', '\n', '\r', '\t': // short-escape if possible - return writeStringShortEscape(w, c) - default: - return writeStringUnicodeEscape(w, c) - } - case c == '"' || c == '\\': // override, gotta escape these - return writeStringShortEscape(w, c) - default: // obey - return writeRune(w, c) - } - case BackslashEscapeShort: - switch c { - case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey - return writeStringShortEscape(w, c) - default: // override, can't short-escape these - return writeRune(w, c) - } - case BackslashEscapeUnicode: - switch { - case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return writeRune(w, c) - default: // obey - return writeStringUnicodeEscape(w, c) - } - default: - panic("escaper returned an invalid escape mode") - } -} diff --git a/struct.go b/struct.go index 24b2ac0..b7fc287 100644 --- a/struct.go +++ b/struct.go @@ -6,6 +6,8 @@ package lowmemjson import ( "reflect" + + "git.lukeshu.com/go/lowmemjson/internal" ) type structField struct { @@ -143,7 +145,7 @@ func indexStructInner(typ reflect.Type, byPos *[]structField, byName map[string] if tag == "-" { continue } - tagName, opts := parseTag(tag) + tagName, opts := internal.ParseTag(tag) name := tagName if !isValidTag(name) { name = "" diff --git a/test_export.go b/test_export.go deleted file mode 100644 index 76d29d2..0000000 --- a/test_export.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "io" - - "git.lukeshu.com/go/lowmemjson/internal" -) - -func init() { - internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } - internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } -} - -var parseTag = internal.ParseTag -- cgit v1.1-4-g5e80 From e87c9b4d8b629f5df19e9dd182162889d279b4f2 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 23:26:26 -0700 Subject: encode: Fix errors for marshalers/encodables with bad output --- encode.go | 18 +++++++-- errors.go | 5 ++- methods_test.go | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 5 deletions(-) diff --git a/encode.go b/encode.go index fa337ad..00848ed 100644 --- a/encode.go +++ b/encode.go @@ -146,7 +146,11 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool }}) } if err := validator.Close(); err != nil && !errors.Is(err, iofs.ErrClosed) { - panic(encodeError{err}) + panic(encodeError{&EncodeMethodError{ + Type: val.Type(), + SourceFunc: "EncodeJSON", + Err: err, + }}) } case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(jsonMarshalerType): @@ -173,10 +177,18 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool // Use a sub-ReEncoder to check that it's a full element. validator := &ReEncoder{Out: w, BackslashEscape: escaper} if _, err := validator.Write(dat); err != nil { - panic(encodeError{err}) + panic(encodeError{&EncodeMethodError{ + Type: val.Type(), + SourceFunc: "MarshalJSON", + Err: err, + }}) } if err := validator.Close(); err != nil { - panic(encodeError{err}) + panic(encodeError{&EncodeMethodError{ + Type: val.Type(), + SourceFunc: "MarshalJSON", + Err: err, + }}) } case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(textMarshalerType): diff --git a/errors.go b/errors.go index d36fc83..fe48723 100644 --- a/errors.go +++ b/errors.go @@ -138,8 +138,9 @@ type EncodeTypeError = json.UnsupportedTypeError // } type EncodeValueError = json.UnsupportedValueError -// An EncodeMethodError wraps an error that is returned from an -// object's method when encoding that object to JSON. +// An EncodeMethodError either wraps an error that is returned from an +// object's method when encoding that object to JSON, or wraps a +// *ReEncodeSyntaxError for the method's output. type EncodeMethodError struct { Type reflect.Type // The Go type that the method is on SourceFunc string // The method: "EncodeJSON", "MarshalJSON", or "MarshalText" diff --git a/methods_test.go b/methods_test.go index 5e2209a..46e2601 100644 --- a/methods_test.go +++ b/methods_test.go @@ -6,8 +6,10 @@ package lowmemjson_test import ( "bytes" + "errors" "fmt" "io" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -121,3 +123,112 @@ func TestMethods(t *testing.T) { assert.NoError(t, lowmemjson.NewDecoder(&buf).Decode(&out)) assert.Equal(t, in, out) } + +type strEncoder string + +func (s strEncoder) EncodeJSON(w io.Writer) error { + _, err := io.WriteString(w, string(s)) + return err +} + +type strMarshaler string + +func (s strMarshaler) MarshalJSON() ([]byte, error) { + return []byte(s), nil +} + +type strTextMarshaler struct { + str string + err string +} + +func (m strTextMarshaler) MarshalText() (txt []byte, err error) { + if len(m.str) > 0 { + txt = []byte(m.str) + } + if len(m.err) > 0 { + err = errors.New(m.err) + } + return +} + +func TestMethodsEncode(t *testing.T) { + t.Parallel() + type testcase struct { + In string + ExpectedErr string + } + testcases := map[string]testcase{ + "basic": {In: `{}`}, + "empty": {In: ``, ExpectedErr: `syntax error at input byte 0: EOF`}, + "short": {In: `{`, ExpectedErr: `syntax error at input byte 1: unexpected EOF`}, + "long": {In: `{}{}`, ExpectedErr: `syntax error at input byte 2: invalid character '{' after top-level value`}, + } + t.Run("encodable", func(t *testing.T) { + t.Parallel() + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + var buf strings.Builder + err := lowmemjson.NewEncoder(&buf).Encode([]any{strEncoder(tc.In)}) + if tc.ExpectedErr == "" { + assert.NoError(t, err) + assert.Equal(t, "["+tc.In+"]", buf.String()) + } else { + assert.EqualError(t, err, + `json: error calling EncodeJSON for type lowmemjson_test.strEncoder: `+ + tc.ExpectedErr) + } + }) + } + }) + t.Run("marshaler", func(t *testing.T) { + t.Parallel() + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + var buf strings.Builder + err := lowmemjson.NewEncoder(&buf).Encode([]any{strMarshaler(tc.In)}) + if tc.ExpectedErr == "" { + assert.NoError(t, err) + assert.Equal(t, "["+tc.In+"]", buf.String()) + } else { + assert.EqualError(t, err, + `json: error calling MarshalJSON for type lowmemjson_test.strMarshaler: `+ + tc.ExpectedErr) + } + }) + } + }) + t.Run("text", func(t *testing.T) { + t.Parallel() + type testcase struct { + Str string + Err string + } + testcases := map[string]testcase{ + "basic": {Str: `a`}, + "err": {Err: `xxx`}, + "both": {Str: `a`, Err: `xxx`}, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + var buf strings.Builder + err := lowmemjson.NewEncoder(&buf).Encode([]any{strTextMarshaler{str: tc.Str, err: tc.Err}}) + if tc.Err == "" { + assert.NoError(t, err) + assert.Equal(t, `["`+tc.Str+`"]`, buf.String()) + } else { + assert.EqualError(t, err, + `json: error calling MarshalText for type lowmemjson_test.strTextMarshaler: `+ + tc.Err) + assert.Equal(t, "[", buf.String()) + } + }) + } + }) +} -- cgit v1.1-4-g5e80 From ff05997fb3b956df05d9f89b34f95cc97775a615 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 29 Jan 2023 21:14:55 -0700 Subject: Add a .editorconfig file --- .editorconfig | 27 +++++++++++++++++++++++++++ README.md | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..85ac8bf --- /dev/null +++ b/.editorconfig @@ -0,0 +1,27 @@ +root = true + +[*] +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.go] +indent_style = tab + +[{go.mod,go.sum}] +indent_style = tab + +[Makefile] +; If somehow this gets set to not-tab, then the resulting Makefile +; won't work. +indent_style = tab + +[*.md] +; Emacs markdown-mode gets mixed tabs/spaces wrong, and so I have zero +; faith that any other tool gets it right. +indent_style = space + +[*.yml] +indent_style = space +indent_size = 2 diff --git a/README.md b/README.md index 955258c..fcb46fa 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ common use of it will be ```go lowmemjson.NewEncoder(&lowmemjson.ReEncoder{ - Out: out, + Out: out, // settings here }).Encode(val) ``` -- cgit v1.1-4-g5e80 From db49f2d6001fff8e3e417dc76fe9222ca9cbf862 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 29 Jan 2023 18:50:31 -0700 Subject: Add a ReleaseNotes.md file --- ReleaseNotes.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 ReleaseNotes.md diff --git a/ReleaseNotes.md b/ReleaseNotes.md new file mode 100644 index 0000000..f19ce68 --- /dev/null +++ b/ReleaseNotes.md @@ -0,0 +1,32 @@ +# v0.2.1 (TBD) + + Theme: Code quality + + This release improves code quality; getting various linters to pass, + adding tests (and a few bug-fixes), refactoring things to be + clearer, fixing some mistakes in the documentation. + + User-facing changes: + + - Encoder: `*EncodeMethodError` is now also used when a method + produces invalid JSON. + +# v0.2.0 (2023-01-26) + + Theme: Add documentation + + This release doesn't make any major changes, and is just adding + documentation. I have removed a few minor things that I didn't want + to write documentation for. + + Breaking changes: + + - Drop the following shorthand functions: + + `func Decode(r io.RuneScanner, ptr any) error { return NewDecoder(r).Decode(ptr) }` + + `func DecodeThenEOF(r io.RuneScanner, ptr any) error { return NewDecoder(r).DecodeThenEOF(ptr) }` + + `func Encode(w io.Writer, obj any) (err error) { return NewEncoder(w).Encode(obj) }` + - Drop `const Tab = "\t"`. + +# v0.1.0 (2022-09-19) + + Theme: Initial release -- cgit v1.1-4-g5e80 From 8b7c8d2f87f9c4d924d070926fb5ab9860d00c61 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 14:29:27 -0700 Subject: decode: s/stack/structStack/ This should make the next commit less noisy. --- decode.go | 74 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/decode.go b/decode.go index f351037..3c51c4b 100644 --- a/decode.go +++ b/decode.go @@ -90,8 +90,8 @@ type Decoder struct { useNumber bool // state - err error - stack []decodeStackItem + err error + structStack []decodeStackItem } const maxNestingDepth = 10000 @@ -150,40 +150,40 @@ func (dec *Decoder) More() bool { return e == nil && t != internal.RuneTypeEOF } -func (dec *Decoder) stackPush(par reflect.Type, idx any) { - dec.stack = append(dec.stack, decodeStackItem{par, idx}) +func (dec *Decoder) structStackPush(par reflect.Type, idx any) { + dec.structStack = append(dec.structStack, decodeStackItem{par, idx}) } -func (dec *Decoder) stackPop() { - dec.stack = dec.stack[:len(dec.stack)-1] +func (dec *Decoder) structStackPop() { + dec.structStack = dec.structStack[:len(dec.structStack)-1] } -func (dec *Decoder) stackStr() string { +func (dec *Decoder) structStackStr() string { var buf strings.Builder buf.WriteString("v") - for _, item := range dec.stack { + for _, item := range dec.structStack { fmt.Fprintf(&buf, "[%#v]", item.idx) } return buf.String() } -func (dec *Decoder) stackParent() string { - last := len(dec.stack) - 1 - if last > 0 && dec.stack[last].par.Kind() != reflect.Struct && dec.stack[last-1].par.Kind() == reflect.Struct { +func (dec *Decoder) structStackParent() string { + last := len(dec.structStack) - 1 + if last > 0 && dec.structStack[last].par.Kind() != reflect.Struct && dec.structStack[last-1].par.Kind() == reflect.Struct { last-- } - if last >= 0 && dec.stack[last].par.Kind() == reflect.Struct { - return dec.stack[last].par.Name() + if last >= 0 && dec.structStack[last].par.Kind() == reflect.Struct { + return dec.structStack[last].par.Name() } return "" } -func (dec *Decoder) stackName() string { - if dec.stackParent() == "" { +func (dec *Decoder) structStackName() string { + if dec.structStackParent() == "" { return "" } var fields []string - for _, elem := range dec.stack { + for _, elem := range dec.structStack { if elem.par.Kind() == reflect.Struct { fields = append(fields, elem.idx.(string)) } @@ -259,9 +259,9 @@ type decodeError DecodeError func (dec *Decoder) panicType(jTyp string, gTyp reflect.Type, err error) { panic(decodeError{ - Field: dec.stackStr(), - FieldParent: dec.stackParent(), - FieldName: dec.stackName(), + Field: dec.structStackStr(), + FieldParent: dec.structStackParent(), + FieldName: dec.structStackName(), Err: &DecodeTypeError{ GoType: gTyp, JSONType: jTyp, @@ -275,9 +275,9 @@ func (dec *Decoder) readRune() (rune, internal.RuneType) { c, _, t, e := dec.io.ReadRuneType() if e != nil { panic(decodeError{ - Field: dec.stackStr(), - FieldParent: dec.stackParent(), - FieldName: dec.stackName(), + Field: dec.structStackStr(), + FieldParent: dec.structStackParent(), + FieldName: dec.structStackName(), Err: e, }) } @@ -320,9 +320,9 @@ func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, erro c, s, t, e := sc.dec.io.ReadRuneType() if e != nil { panic(decodeError{ - Field: sc.dec.stackStr(), - FieldParent: sc.dec.stackParent(), - FieldName: sc.dec.stackName(), + Field: sc.dec.structStackStr(), + FieldParent: sc.dec.structStackParent(), + FieldName: sc.dec.structStackName(), Err: e, }) } @@ -534,8 +534,8 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.decodeString(nil, &nameBuf) }, func() { name := nameBuf.String() - dec.stackPush(typ, name) - defer dec.stackPop() + dec.structStackPush(typ, name) + defer dec.structStackPop() idx, ok := index.byName[name] if !ok { for oidx := range index.byPos { @@ -645,8 +645,8 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.panicType("object", typ, &DecodeArgumentError{Type: nameValTyp}) } } - dec.stackPush(typ, nameValPtr.Elem()) - defer dec.stackPop() + dec.structStackPush(typ, nameValPtr.Elem()) + defer dec.structStackPop() fValPtr := reflect.New(typ.Elem()) dec.decode(fValPtr.Elem(), false) @@ -699,8 +699,8 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } i := 0 dec.decodeArray(typ, func() { - dec.stackPush(typ, i) - defer dec.stackPop() + dec.structStackPush(typ, i) + defer dec.structStackPop() mValPtr := reflect.New(typ.Elem()) dec.decode(mValPtr.Elem(), false) val.Set(reflect.Append(val, mValPtr.Elem())) @@ -718,8 +718,8 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { i := 0 n := val.Len() dec.decodeArray(typ, func() { - dec.stackPush(typ, i) - defer dec.stackPop() + dec.structStackPush(typ, i) + defer dec.structStackPop() if i < n { mValPtr := reflect.New(typ.Elem()) dec.decode(mValPtr.Elem(), false) @@ -780,8 +780,8 @@ func (dec *Decoder) decodeAny() any { dec.decodeString(nil, &nameBuf) }, func() { name := nameBuf.String() - dec.stackPush(typ, name) - defer dec.stackPop() + dec.structStackPush(typ, name) + defer dec.structStackPop() ret[name] = dec.decodeAny() }) return ret @@ -789,8 +789,8 @@ func (dec *Decoder) decodeAny() any { ret := []any{} typ := reflect.TypeOf(ret) dec.decodeArray(typ, func() { - dec.stackPush(typ, len(ret)) - defer dec.stackPop() + dec.structStackPush(typ, len(ret)) + defer dec.structStackPop() ret = append(ret, dec.decodeAny()) }) return ret -- cgit v1.1-4-g5e80 From c24b34a47359ffb012b85e329f829b64d9d27215 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 12:31:42 -0700 Subject: decode: Fix DecodeTypeError offsets --- ReleaseNotes.md | 3 ++ compat/json/compat.go | 2 +- decode.go | 56 ++++++++++++++++++++++++++++----- decode_scan.go | 4 +-- decode_scan_test.go | 6 ++-- methods_test.go | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 14 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index f19ce68..a2365f0 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -10,6 +10,9 @@ - Encoder: `*EncodeMethodError` is now also used when a method produces invalid JSON. + - Decoder: The offset in `*DecodeTypeError`s now correctly point + the start of the value, rather than somewhere in the middle of + it. # v0.2.0 (2023-01-26) diff --git a/compat/json/compat.go b/compat/json/compat.go index 0c9e800..48d708b 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -183,7 +183,7 @@ func convertDecodeError(err error) error { default: err = &SyntaxError{ msg: terr.Err.Error(), - Offset: terr.Offset, + Offset: terr.Offset + 1, } } case *lowmemjson.DecodeTypeError: diff --git a/decode.go b/decode.go index 3c51c4b..a7536f5 100644 --- a/decode.go +++ b/decode.go @@ -91,6 +91,7 @@ type Decoder struct { // state err error + posStack []int64 structStack []decodeStackItem } @@ -150,6 +151,14 @@ func (dec *Decoder) More() bool { return e == nil && t != internal.RuneTypeEOF } +func (dec *Decoder) posStackPush() { + dec.posStack = append(dec.posStack, dec.InputOffset()) +} + +func (dec *Decoder) posStackPop() { + dec.posStack = dec.posStack[:len(dec.posStack)-1] +} + func (dec *Decoder) structStackPush(par reflect.Type, idx any) { dec.structStack = append(dec.structStack, decodeStackItem{par, idx}) } @@ -266,7 +275,7 @@ func (dec *Decoder) panicType(jTyp string, gTyp reflect.Type, err error) { GoType: gTyp, JSONType: jTyp, Err: err, - Offset: dec.InputOffset(), + Offset: dec.posStack[len(dec.posStack)-1], }, }) } @@ -381,6 +390,8 @@ var kind2bits = map[reflect.Kind]int{ } func (dec *Decoder) decode(val reflect.Value, nullOK bool) { + dec.posStackPush() + defer dec.posStackPop() typ := val.Type() switch { case val.CanAddr() && reflect.PointerTo(typ) == rawMessagePtrType: @@ -388,17 +399,17 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { var buf bytes.Buffer dec.scan(&buf) if err := val.Addr().Interface().(*json.RawMessage).UnmarshalJSON(buf.Bytes()); err != nil { - dec.panicType(t.JSONType(), typ, err) + dec.panicType(t.JSONType(), reflect.PointerTo(typ), err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(decodableType): t := dec.peekRuneType() obj := val.Addr().Interface().(Decodable) l := dec.limitingScanner() if err := obj.DecodeJSON(l); err != nil { - dec.panicType(t.JSONType(), typ, err) + dec.panicType(t.JSONType(), reflect.PointerTo(typ), err) } if _, _, err := l.ReadRune(); err != io.EOF { - dec.panicType(t.JSONType(), typ, fmt.Errorf("did not consume entire %s", t.JSONType())) + dec.panicType(t.JSONType(), reflect.PointerTo(typ), fmt.Errorf("did not consume entire %s", t.JSONType())) } case val.CanAddr() && reflect.PointerTo(typ).Implements(jsonUnmarshalerType): t := dec.peekRuneType() @@ -406,7 +417,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.scan(&buf) obj := val.Addr().Interface().(json.Unmarshaler) if err := obj.UnmarshalJSON(buf.Bytes()); err != nil { - dec.panicType(t.JSONType(), typ, err) + dec.panicType(t.JSONType(), reflect.PointerTo(typ), err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(textUnmarshalerType): if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { @@ -530,9 +541,13 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { index := indexStruct(typ) var nameBuf strings.Builder dec.decodeObject(typ, func() { + dec.posStackPush() + defer dec.posStackPop() nameBuf.Reset() dec.decodeString(nil, &nameBuf) }, func() { + dec.posStackPush() + defer dec.posStackPop() name := nameBuf.String() dec.structStackPush(typ, name) defer dec.structStackPop() @@ -613,17 +628,19 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { val.Set(reflect.MakeMap(typ)) } var nameBuf bytes.Buffer + var nameValPtr reflect.Value dec.decodeObject(typ, func() { + dec.posStackPush() + defer dec.posStackPop() nameBuf.Reset() dec.decodeString(nil, &nameBuf) - }, func() { nameValTyp := typ.Key() - nameValPtr := reflect.New(nameValTyp) + nameValPtr = reflect.New(nameValTyp) switch { case reflect.PointerTo(nameValTyp).Implements(textUnmarshalerType): obj := nameValPtr.Interface().(encoding.TextUnmarshaler) if err := obj.UnmarshalText(nameBuf.Bytes()); err != nil { - dec.panicType("string", nameValTyp, err) + dec.panicType("string", reflect.PointerTo(nameValTyp), err) } default: switch nameValTyp.Kind() { @@ -645,6 +662,9 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.panicType("object", typ, &DecodeArgumentError{Type: nameValTyp}) } } + }, func() { + dec.posStackPush() + defer dec.posStackPop() dec.structStackPush(typ, nameValPtr.Elem()) defer dec.structStackPop() @@ -699,6 +719,8 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } i := 0 dec.decodeArray(typ, func() { + dec.posStackPush() + defer dec.posStackPop() dec.structStackPush(typ, i) defer dec.structStackPop() mValPtr := reflect.New(typ.Elem()) @@ -718,6 +740,8 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { i := 0 n := val.Len() dec.decodeArray(typ, func() { + dec.posStackPush() + defer dec.posStackPop() dec.structStackPush(typ, i) defer dec.structStackPop() if i < n { @@ -776,9 +800,13 @@ func (dec *Decoder) decodeAny() any { typ := reflect.TypeOf(ret) var nameBuf strings.Builder dec.decodeObject(typ, func() { + dec.posStackPush() + defer dec.posStackPop() nameBuf.Reset() dec.decodeString(nil, &nameBuf) }, func() { + dec.posStackPush() + defer dec.posStackPop() name := nameBuf.String() dec.structStackPush(typ, name) defer dec.structStackPop() @@ -789,6 +817,8 @@ func (dec *Decoder) decodeAny() any { ret := []any{} typ := reflect.TypeOf(ret) dec.decodeArray(typ, func() { + dec.posStackPush() + defer dec.posStackPop() dec.structStackPush(typ, len(ret)) defer dec.structStackPop() ret = append(ret, dec.decodeAny()) @@ -840,8 +870,12 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er } }() dec := NewDecoder(r) + dec.posStackPush() + defer dec.posStackPop() dec.decodeObject(nil, func() { + dec.posStackPush() + defer dec.posStackPop() l := dec.limitingScanner() if err := decodeKey(l); err != nil { dec.panicType("string", nil, err) @@ -851,6 +885,8 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er } }, func() { + dec.posStackPush() + defer dec.posStackPop() t := dec.peekRuneType() l := dec.limitingScanner() if err := decodeVal(l); err != nil { @@ -910,7 +946,11 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e } }() dec := NewDecoder(r) + dec.posStackPush() + defer dec.posStackPop() dec.decodeArray(nil, func() { + dec.posStackPush() + defer dec.posStackPop() t := dec.peekRuneType() l := dec.limitingScanner() if err := decodeMember(l); err != nil { diff --git a/decode_scan.go b/decode_scan.go index 249975d..b9a5ea8 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -62,7 +62,7 @@ func (sc *runeTypeScannerImpl) Reset() { sc.rType, err = sc.parser.HandleRune(sc.rRune) if err != nil { sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset, + Offset: sc.offset - int64(sc.rSize), Err: err, } } else { @@ -89,7 +89,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err sc.rType, err = sc.parser.HandleRune(sc.rRune) if err != nil { sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset, + Offset: sc.offset - int64(sc.rSize), Err: err, } } else { diff --git a/decode_scan_test.go b/decode_scan_test.go index f5ceee0..6a430ab 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -141,9 +141,9 @@ func TestRuneTypeScanner(t *testing.T) { {'[', 1, internal.RuneTypeArrayBeg, nil}, {'0', 1, internal.RuneTypeNumberIntZero, nil}, {',', 1, internal.RuneTypeArrayComma, nil}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, diff --git a/methods_test.go b/methods_test.go index 46e2601..f5d5a9a 100644 --- a/methods_test.go +++ b/methods_test.go @@ -232,3 +232,90 @@ func TestMethodsEncode(t *testing.T) { } }) } + +type tstDecoder struct { + n int + err string +} + +func (d *tstDecoder) DecodeJSON(r io.RuneScanner) error { + for i := 0; i < d.n; i++ { + if _, _, err := r.ReadRune(); err != nil { + if err == io.EOF { + break + } + return err + } + } + if len(d.err) > 0 { + return errors.New(d.err) + } + return nil +} + +type strUnmarshaler struct { + err string +} + +func (u *strUnmarshaler) UnmarshalJSON([]byte) error { + if u.err == "" { + return nil + } + return errors.New(u.err) +} + +type textUnmarshaler struct { + err string +} + +func (u *textUnmarshaler) UnmarshalText([]byte) error { + if u.err == "" { + return nil + } + return errors.New(u.err) +} + +type errTextUnmarshaler struct { + S string +} + +func (u *errTextUnmarshaler) UnmarshalText(dat []byte) error { + u.S = string(dat) + return errors.New("eee") +} + +func TestMethodsDecode(t *testing.T) { + t.Parallel() + type testcase struct { + In string + Obj any + ExpectedErr string + } + testcases := map[string]testcase{ + "decode-basic": {In: `{}`, Obj: &tstDecoder{n: 2}}, + "decode-basic-eof": {In: `{}`, Obj: &tstDecoder{n: 5}}, + "decode-syntax-error": {In: `{x}`, Obj: &tstDecoder{n: 5}, ExpectedErr: `json: v: syntax error at input byte 1: object: unexpected character: 'x'`}, + "unmarshal-syntax-error": {In: `{x}`, Obj: &strUnmarshaler{}, ExpectedErr: `json: v: syntax error at input byte 1: object: unexpected character: 'x'`}, + "decode-short": {In: `{}`, Obj: &tstDecoder{n: 1}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.tstDecoder: did not consume entire object`}, + "decode-err": {In: `{}`, Obj: &tstDecoder{err: "xxx"}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.tstDecoder: xxx`}, + "decode-err2": {In: `{}`, Obj: &tstDecoder{n: 1, err: "yyy"}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.tstDecoder: yyy`}, + "unmarshal-err": {In: `{}`, Obj: &strUnmarshaler{err: "zzz"}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.strUnmarshaler: zzz`}, + "unmarshaltext": {In: `""`, Obj: &textUnmarshaler{}}, + "unmarshaltext-nonstr": {In: `{}`, Obj: &textUnmarshaler{}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.textUnmarshaler`}, + "unmarshaltext-err": {In: `""`, Obj: &textUnmarshaler{err: "zzz"}, ExpectedErr: `json: v: cannot decode JSON string at input byte 0 into Go *lowmemjson_test.textUnmarshaler: zzz`}, + "unmarshaltext-mapkey": {In: `{"a":1}`, Obj: new(map[errTextUnmarshaler]int), ExpectedErr: `json: v: cannot decode JSON string at input byte 1 into Go *lowmemjson_test.errTextUnmarshaler: eee`}, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + obj := tc.Obj + err := lowmemjson.NewDecoder(strings.NewReader(tc.In)).Decode(&obj) + if tc.ExpectedErr == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.ExpectedErr) + } + }) + } +} -- cgit v1.1-4-g5e80 From 75a59f2b56982bc753d594a5af375b23ef786fdf Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 16:54:15 -0700 Subject: decode: Tidy up DecodeObject and DecodeArray --- decode.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/decode.go b/decode.go index a7536f5..7ae723c 100644 --- a/decode.go +++ b/decode.go @@ -878,9 +878,11 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er defer dec.posStackPop() l := dec.limitingScanner() if err := decodeKey(l); err != nil { + // TODO: Find a better Go type to use than `nil`. dec.panicType("string", nil, err) } if _, _, err := l.ReadRune(); err != io.EOF { + // TODO: Find a better Go type to use than `nil`. dec.panicType("string", nil, fmt.Errorf("did not consume entire string")) } }, @@ -890,13 +892,15 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er t := dec.peekRuneType() l := dec.limitingScanner() if err := decodeVal(l); err != nil { + // TODO: Find a better Go type to use than `nil`. dec.panicType(t.JSONType(), nil, err) } if _, _, err := l.ReadRune(); err != io.EOF { + // TODO: Find a better Go type to use than `nil`. dec.panicType(t.JSONType(), nil, fmt.Errorf("did not consume entire %s", t.JSONType())) } }) - return err + return nil } func (dec *Decoder) decodeObject(gTyp reflect.Type, decodeKey, decodeVal func()) { @@ -954,13 +958,15 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e t := dec.peekRuneType() l := dec.limitingScanner() if err := decodeMember(l); err != nil { + // TODO: Find a better Go type to use than `nil`. dec.panicType(t.JSONType(), nil, err) } if _, _, err := l.ReadRune(); err != io.EOF { + // TODO: Find a better Go type to use than `nil`. dec.panicType(t.JSONType(), nil, fmt.Errorf("did not consume entire %s", t.JSONType())) } }) - return + return nil } func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { -- cgit v1.1-4-g5e80 From 7de3be7d772ab32adb1a865450ba60567367064c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 17:27:14 -0700 Subject: parse: Simplify the stack states for arrays We already have a wildcard, no need to invent a new state. --- internal/parse.go | 14 ++------------ internal/parse_test.go | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/internal/parse.go b/internal/parse.go index 121857b..b11aae6 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -264,7 +264,6 @@ type Parser struct { // // [ array: waiting for item to start or ']' // a array: reading item / waiting for ',' or ']' - // ] array: waiting for item to start // // Within each element type, the stack item is replaced, not pushed. // @@ -308,7 +307,7 @@ type Parser struct { // a" [" // a" ["x // a ["x" - // ] ["x", + // a? ["x", // a" ["x"," // a" ["x","y // a ["x","y" @@ -513,21 +512,12 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { par.pushState(runeTypeAny) return par.HandleRune(c) } - case RuneTypeArrayEnd: // waiting for item - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - par.replaceState(RuneTypeArrayComma) - par.pushState(runeTypeAny) - return par.HandleRune(c) - } case RuneTypeArrayComma: // waiting for ',' or ']' switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ',': - par.replaceState(RuneTypeArrayEnd) + par.pushState(runeTypeAny) return RuneTypeArrayComma, nil case ']': par.popState() diff --git a/internal/parse_test.go b/internal/parse_test.go index 91cd277..34977fb 100644 --- a/internal/parse_test.go +++ b/internal/parse_test.go @@ -51,7 +51,7 @@ func TestParserHandleRune(t *testing.T) { `a"`, // [" `a"`, // ["x `a`, // ["x" - `]`, // ["x", + `a?`, // ["x", `a"`, // ["x"," `a"`, // ["x","y `a`, // ["x","y" -- cgit v1.1-4-g5e80 From 2e48a42fb9b9e946958810cfbb90ae85bee997e4 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 20:46:07 -0700 Subject: decode: Remove unnecessary scanner struct members --- decode_scan.go | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/decode_scan.go b/decode_scan.go index b9a5ea8..387fcea 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -35,28 +35,24 @@ type runeTypeScannerImpl struct { inner io.RuneScanner // initialized by constructor - initialized bool - parser internal.Parser // initialized by constructor offset int64 - repeat bool - stuck bool - rRune rune - rRuneOK bool - rSize int - rType internal.RuneType - rErr error + initialized bool + repeat bool + + rRune rune + rSize int + rType internal.RuneType + rErr error } var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) func (sc *runeTypeScannerImpl) Reset() { sc.parser.Reset() - unread := sc.stuck && sc.rType == internal.RuneTypeEOF && sc.rRuneOK - sc.stuck = false - sc.repeat = false - if unread { + if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { + sc.repeat = false // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune) @@ -75,15 +71,15 @@ func (sc *runeTypeScannerImpl) Reset() { func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) { switch { - case sc.stuck: + case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF): // do nothing case sc.repeat: _, _, _ = sc.inner.ReadRune() default: + sc.initialized = true var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() sc.offset += int64(sc.rSize) - sc.rRuneOK = err == nil switch err { case nil: sc.rType, err = sc.parser.HandleRune(sc.rRune) @@ -113,9 +109,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err } } } - sc.initialized = true sc.repeat = false - sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError return sc.rRune, sc.rSize, sc.rType, sc.rErr } @@ -137,7 +131,7 @@ func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { // unread, or if that call returned a rune with size 0, then // ErrInvalidUnreadRune is returned. Otherwise, nil is returned. func (sc *runeTypeScannerImpl) UnreadRune() error { - if !sc.initialized || sc.repeat || sc.rSize == 0 { + if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } sc.repeat = true -- cgit v1.1-4-g5e80