From 8aa12d3cb043859229810947da6c52e600d34b55 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 26 Jan 2023 13:59:35 -0700 Subject: struct.go: Cache structIndexes This should help save some CPU time and avoid some memory churn. --- go.mod | 5 ++++- go.sum | 2 ++ struct.go | 14 +++++++++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 74386a3..452d2ff 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module git.lukeshu.com/go/lowmemjson go 1.18 -require github.com/stretchr/testify v1.8.0 +require ( + git.lukeshu.com/go/typedsync v0.0.0-20230126205501-1e8afc0ceb1e + github.com/stretchr/testify v1.8.0 +) require ( github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/go.sum b/go.sum index 5164829..76cf271 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +git.lukeshu.com/go/typedsync v0.0.0-20230126205501-1e8afc0ceb1e h1:ZAzzElMx7aMgJXC9QXOxIPyoZrWxX00eP2sR4UHYP+4= +git.lukeshu.com/go/typedsync v0.0.0-20230126205501-1e8afc0ceb1e/go.mod h1:EAn7NcfoGeGMv3DWxKQnifcT/rYPAIEqp9Rsz//oYqY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/struct.go b/struct.go index b7fc287..8a664c6 100644 --- a/struct.go +++ b/struct.go @@ -7,6 +7,8 @@ package lowmemjson import ( "reflect" + "git.lukeshu.com/go/typedsync" + "git.lukeshu.com/go/lowmemjson/internal" ) @@ -25,9 +27,19 @@ type structIndex struct { byName map[string]int } +var structIndexCache typedsync.CacheMap[reflect.Type, structIndex] + // indexStruct takes a struct Type, and indexes its fields for use by -// Decoder.Decode() and Encoder.Encode(). +// Decoder.Decode() and Encoder.Encode(). indexStruct caches its +// results. func indexStruct(typ reflect.Type) structIndex { + ret, _ := structIndexCache.LoadOrCompute(typ, indexStructReal) + return ret +} + +// indexStructReal is like indexStruct, but is the real indexer, +// bypassing the cache. +func indexStructReal(typ reflect.Type) structIndex { var byPos []structField byName := make(map[string][]int) -- cgit v1.2.3-2-g168b From 2828fa21c0ffd2a32a108b37c0417b01abc42929 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 26 Jan 2023 21:02:56 -0700 Subject: Avoid doing type switching in inner functions The CPU profiler tells me that the encoder is spending a lot of time on type switches. --- decode.go | 40 ++++++------ encode.go | 14 ++-- encode_string.go | 16 ++--- internal/allwriter.go | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++ internal/base64.go | 9 ++- ioutil.go | 31 --------- reencode.go | 49 ++++++++++++-- 7 files changed, 261 insertions(+), 72 deletions(-) create mode 100644 internal/allwriter.go delete mode 100644 ioutil.go diff --git a/decode.go b/decode.go index 7ae723c..91be865 100644 --- a/decode.go +++ b/decode.go @@ -565,7 +565,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { if dec.disallowUnknownFields { dec.panicType("", typ, fmt.Errorf("json: unknown field %q", name)) } - dec.scan(io.Discard) + dec.scan(internal.Discard) return } field := index.byPos[idx] @@ -749,7 +749,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.decode(mValPtr.Elem(), false) val.Index(i).Set(mValPtr.Elem()) } else { - dec.scan(io.Discard) + dec.scan(internal.Discard) } i++ }) @@ -773,18 +773,18 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } } -func (dec *Decoder) scan(out io.Writer) { +func (dec *Decoder) scan(out internal.RuneWriter) { limiter := dec.limitingScanner() for { c, _, err := limiter.ReadRune() if err == io.EOF { return } - _, _ = writeRune(out, c) + _, _ = out.WriteRune(c) } } -func (dec *Decoder) scanNumber(gTyp reflect.Type, out io.Writer) { +func (dec *Decoder) scanNumber(gTyp reflect.Type, out internal.RuneWriter) { if t := dec.peekRuneType(); !t.IsNumber() { dec.panicType(t.JSONType(), gTyp, nil) } @@ -991,34 +991,34 @@ func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { } } -func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { +func (dec *Decoder) decodeString(gTyp reflect.Type, out internal.RuneWriter) { dec.expectRuneType('"', internal.RuneTypeStringBeg, gTyp) var uhex [4]byte for { c, t := dec.readRune() switch t { case internal.RuneTypeStringChar: - _, _ = writeRune(out, c) + _, _ = out.WriteRune(c) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: // do nothing case internal.RuneTypeStringEsc1: switch c { case '"': - _, _ = writeRune(out, '"') + _, _ = out.WriteRune('"') case '\\': - _, _ = writeRune(out, '\\') + _, _ = out.WriteRune('\\') case '/': - _, _ = writeRune(out, '/') + _, _ = out.WriteRune('/') case 'b': - _, _ = writeRune(out, '\b') + _, _ = out.WriteRune('\b') case 'f': - _, _ = writeRune(out, '\f') + _, _ = out.WriteRune('\f') case 'n': - _, _ = writeRune(out, '\n') + _, _ = out.WriteRune('\n') case 'r': - _, _ = writeRune(out, '\r') + _, _ = out.WriteRune('\r') case 't': - _, _ = writeRune(out, '\t') + _, _ = out.WriteRune('\t') default: panic("should not happen") } @@ -1038,12 +1038,12 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { handleUnicode: if utf16.IsSurrogate(c) { if dec.peekRuneType() != internal.RuneTypeStringEsc { - _, _ = writeRune(out, utf8.RuneError) + _, _ = out.WriteRune(utf8.RuneError) break } dec.expectRune('\\', internal.RuneTypeStringEsc) if dec.peekRuneType() != internal.RuneTypeStringEscU { - _, _ = writeRune(out, utf8.RuneError) + _, _ = out.WriteRune(utf8.RuneError) break } dec.expectRune('u', internal.RuneTypeStringEscU) @@ -1063,13 +1063,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { rune(uhex[3])<<0 d := utf16.DecodeRune(c, c2) if d == utf8.RuneError { - _, _ = writeRune(out, utf8.RuneError) + _, _ = out.WriteRune(utf8.RuneError) c = c2 goto handleUnicode } - _, _ = writeRune(out, d) + _, _ = out.WriteRune(d) } else { - _, _ = writeRune(out, c) + _, _ = out.WriteRune(c) } case internal.RuneTypeStringEnd: return diff --git a/encode.go b/encode.go index e9c7ac6..c5a29b3 100644 --- a/encode.go +++ b/encode.go @@ -18,6 +18,8 @@ import ( "strconv" "strings" "unsafe" + + "git.lukeshu.com/go/lowmemjson/internal" ) // Encodable is the interface implemented by types that can encode @@ -34,14 +36,14 @@ type encodeError struct { Err error } -func encodeWriteByte(w io.Writer, b byte) { - if err := writeByte(w, b); err != nil { +func encodeWriteByte(w io.ByteWriter, b byte) { + if err := w.WriteByte(b); err != nil { panic(encodeError{err}) } } -func encodeWriteString(w io.Writer, str string) { - if _, err := io.WriteString(w, str); err != nil { +func encodeWriteString(w io.StringWriter, str string) { + if _, err := w.WriteString(str); err != nil { panic(encodeError{err}) } } @@ -115,7 +117,7 @@ var ( const startDetectingCyclesAfter = 1000 -func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) { +func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) { if !val.IsValid() { encodeWriteString(w, "null") return @@ -436,7 +438,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool } } -func encodeArray(w io.Writer, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { +func encodeArray(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { encodeWriteByte(w, '[') n := val.Len() for i := 0; i < n; i++ { diff --git a/encode_string.go b/encode_string.go index c5cb442..831a038 100644 --- a/encode_string.go +++ b/encode_string.go @@ -45,7 +45,7 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { return w.Write(buf[:]) } -func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { +func writeStringChar(w internal.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { if escaper == nil { escaper = EscapeDefault } @@ -62,19 +62,19 @@ func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escape case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey - return writeRune(w, c) + return w.WriteRune(c) } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these - return writeRune(w, c) + return w.WriteRune(c) } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return writeRune(w, c) + return w.WriteRune(c) default: // obey return writeStringUnicodeEscape(w, c) } @@ -83,7 +83,7 @@ func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escape } } -func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { +func encodeStringFromString(w internal.AllWriter, escaper BackslashEscaper, str string) { encodeWriteByte(w, '"') for _, c := range str { if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { @@ -93,7 +93,7 @@ func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { encodeWriteByte(w, '"') } -func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { +func encodeStringFromBytes(w internal.AllWriter, escaper BackslashEscaper, str []byte) { encodeWriteByte(w, '"') for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) @@ -106,6 +106,6 @@ func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { } func init() { - internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } - internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } + internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(internal.NewAllWriter(w), nil, s) } + internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(internal.NewAllWriter(w), nil, s) } } diff --git a/internal/allwriter.go b/internal/allwriter.go new file mode 100644 index 0000000..187aa8e --- /dev/null +++ b/internal/allwriter.go @@ -0,0 +1,174 @@ +// Copyright (C) 2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "io" + "unicode/utf8" +) + +// interfaces ///////////////////////////////////////////////////////////////// + +type RuneWriter interface { + WriteRune(rune) (int, error) +} + +// An AllWriter is the union of several common writer interfaces. +type AllWriter interface { + io.Writer + io.ByteWriter + RuneWriter + io.StringWriter +} + +// implementations //////////////////////////////////////////////////////////// + +func WriteByte(w io.Writer, b byte) error { + var buf [1]byte + buf[0] = b + _, err := w.Write(buf[:]) + return err +} + +func WriteRune(w io.Writer, r rune) (int, error) { + var buf [utf8.UTFMax]byte + n := utf8.EncodeRune(buf[:], r) + return w.Write(buf[:n]) +} + +func WriteString(w io.Writer, s string) (int, error) { + return w.Write([]byte(s)) +} + +// wrappers /////////////////////////////////////////////////////////////////// + +// NNN + +type ( + writerNNN interface{ io.Writer } + writerNNNWrapper struct{ writerNNN } +) + +func (w writerNNNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } +func (w writerNNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NNY + +type ( + writerNNY interface { + io.Writer + io.StringWriter + } + writerNNYWrapper struct{ writerNNY } +) + +func (w writerNNYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } + +// NYN + +type ( + writerNYN interface { + io.Writer + RuneWriter + } + writerNYNWrapper struct{ writerNYN } +) + +func (w writerNYNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NYY + +type ( + writerNYY interface { + io.Writer + RuneWriter + io.StringWriter + } + writerNYYWrapper struct{ writerNYY } +) + +func (w writerNYYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } + +// YNN + +type ( + writerYNN interface { + io.Writer + io.ByteWriter + } + writerYNNWrapper struct{ writerYNN } +) + +func (w writerYNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } +func (w writerYNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// YNY + +type ( + writerYNY interface { + io.Writer + io.ByteWriter + io.StringWriter + } + writerYNYWrapper struct{ writerYNY } +) + +func (w writerYNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } + +// YYN + +type ( + writerYYN interface { + io.Writer + io.ByteWriter + RuneWriter + } + writerYYNWrapper struct{ writerYYN } +) + +func (w writerYYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NewAllWriter wraps an io.Writer turning it in to an AllWriter. If +// the io.Writer already has any of the other write methods, then its +// native version of those methods are used. +func NewAllWriter(inner io.Writer) AllWriter { + switch inner := inner.(type) { + // 3 Y bits + case AllWriter: // YYY: + return inner + // 2 Y bits + case writerNYY: + return writerNYYWrapper{writerNYY: inner} + case writerYNY: + return writerYNYWrapper{writerYNY: inner} + case writerYYN: + return writerYYNWrapper{writerYYN: inner} + // 1 Y bit + case writerNNY: + return writerNNYWrapper{writerNNY: inner} + case writerNYN: + return writerNYNWrapper{writerNYN: inner} + case writerYNN: + return writerYNNWrapper{writerYNN: inner} + // 0 Y bits + default: // NNN: + return writerNNNWrapper{writerNNN: inner} + } +} + +// discard ///////////////////////////////////////////////////////////////////// + +// Discard is like io.Discard, but implements AllWriter. +var Discard = discard{} + +type discard struct{} + +func (discard) Write(p []byte) (int, error) { return len(p), nil } +func (discard) WriteByte(b byte) error { return nil } +func (discard) WriteRune(r rune) (int, error) { return 0, nil } +func (discard) WriteString(s string) (int, error) { return len(s), nil } diff --git a/internal/base64.go b/internal/base64.go index 15adbf4..291a229 100644 --- a/internal/base64.go +++ b/internal/base64.go @@ -19,7 +19,10 @@ type base64Decoder struct { bufLen int } -func NewBase64Decoder(w io.Writer) io.WriteCloser { +func NewBase64Decoder(w io.Writer) interface { + io.WriteCloser + RuneWriter +} { return &base64Decoder{ dst: w, } @@ -112,6 +115,10 @@ func (dec *base64Decoder) Write(dat []byte) (int, error) { return len(dat), nil } +func (dec *base64Decoder) WriteRune(r rune) (int, error) { + return WriteRune(dec, r) +} + func (dec *base64Decoder) Close() error { if dec.bufLen == 0 { return nil diff --git a/ioutil.go b/ioutil.go deleted file mode 100644 index a53eac3..0000000 --- a/ioutil.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "io" - "unicode/utf8" -) - -func writeByte(w io.Writer, c byte) error { - if br, ok := w.(interface{ WriteByte(byte) error }); ok { - return br.WriteByte(c) - } - var buf [1]byte - buf[0] = c - if _, err := w.Write(buf[:]); err != nil { - return err - } - return nil -} - -func writeRune(w io.Writer, c rune) (int, error) { - if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { - return rw.WriteRune(c) - } - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], c) - return w.Write(buf[:n]) -} diff --git a/reencode.go b/reencode.go index 876af62..393e8c6 100644 --- a/reencode.go +++ b/reencode.go @@ -71,7 +71,7 @@ type ReEncoderConfig struct { func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ ReEncoderConfig: cfg, - out: out, + out: internal.NewAllWriter(out), } } @@ -85,9 +85,9 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). type ReEncoder struct { ReEncoderConfig - out io.Writer + out internal.AllWriter - // state: .Write's utf8-decoding buffer + // state: .Write's and .WriteString's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int @@ -119,6 +119,11 @@ type speculation struct { // public API ////////////////////////////////////////////////////////////////// +var ( + _ internal.AllWriter = (*ReEncoder)(nil) + _ io.Closer = (*ReEncoder)(nil) +) + // Write implements io.Writer; it does what you'd expect. // // It is worth noting that Write returns the number of bytes consumed @@ -152,6 +157,38 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { return len(p), nil } +// WriteString implements io.StringWriter; it does what you'd expect, +// but see the notes on the Write method. +func (enc *ReEncoder) WriteString(p string) (int, error) { + if len(p) == 0 { + return 0, nil + } + var n int + if enc.bufLen > 0 { + copy(enc.buf[enc.bufLen:], p) + c, size := utf8.DecodeRune(enc.buf[:]) + n += size - enc.bufLen + enc.bufLen = 0 + if _, err := enc.WriteRune(c); err != nil { + return 0, err + } + } + for utf8.FullRuneInString(p[n:]) { + c, size := utf8.DecodeRuneInString(p[n:]) + if _, err := enc.WriteRune(c); err != nil { + return n, err + } + n += size + } + enc.bufLen = copy(enc.buf[:], p[n:]) + return len(p), nil +} + +// WriteByte implements io.ByteWriter; it does what you'd expect. +func (enc *ReEncoder) WriteByte(b byte) error { + return internal.WriteByte(enc, b) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -471,7 +508,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { } func (enc *ReEncoder) emitByte(c byte) error { - err := writeByte(enc.out, c) + err := enc.out.WriteByte(c) if err == nil { enc.written++ } @@ -488,12 +525,12 @@ func (enc *ReEncoder) emitNlIndent() error { return err } if enc.Prefix != "" { - if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil { + if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil { return err } } for i := 0; i < enc.handleRuneState.curIndent; i++ { - if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil { + if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { return err } } -- cgit v1.2.3-2-g168b From d5b1b73eaaa060ef468f20d8b9eed029eb60ce45 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 27 Jan 2023 01:24:02 -0700 Subject: encode: Don't use panic for flow-control --- encode.go | 308 +++++++++++++++++++++++++++++++++---------------------- encode_string.go | 38 +++++-- 2 files changed, 216 insertions(+), 130 deletions(-) diff --git a/encode.go b/encode.go index c5a29b3..57f3852 100644 --- a/encode.go +++ b/encode.go @@ -32,22 +32,6 @@ type Encodable interface { EncodeJSON(w io.Writer) error } -type encodeError struct { - Err error -} - -func encodeWriteByte(w io.ByteWriter, b byte) { - if err := w.WriteByte(b); err != nil { - panic(encodeError{err}) - } -} - -func encodeWriteString(w io.StringWriter, str string) { - if _, err := w.WriteString(str); err != nil { - panic(encodeError{err}) - } -} - // An Encoder encodes and writes values to a stream of JSON elements. // // Encoder is analogous to, and has a similar API to the standar @@ -93,22 +77,19 @@ func NewEncoder(w io.Writer) *Encoder { // // [documentation for encoding/json.Marshal]: https://pkg.go.dev/encoding/json@go1.18#Marshal func (enc *Encoder) Encode(obj any) (err error) { - defer func() { - if r := recover(); r != nil { - if e, ok := r.(encodeError); ok { - err = e.Err - } else { - panic(r) - } - } - }() - encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}) + if err := encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}); err != nil { + return err + } if enc.closeAfterEncode { return enc.w.Close() } return nil } +func discardInt(_ int, err error) error { + return err +} + var ( encodableType = reflect.TypeOf((*Encodable)(nil)).Elem() jsonMarshalerType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() @@ -117,10 +98,9 @@ var ( const startDetectingCyclesAfter = 1000 -func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) { +func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error { if !val.IsValid() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } switch { @@ -129,29 +109,27 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q fallthrough case val.Type().Implements(encodableType): if val.Kind() == reflect.Pointer && val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } obj, ok := val.Interface().(Encodable) if !ok { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } // Use a sub-ReEncoder to check that it's a full element. validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) if err := obj.EncodeJSON(validator); err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, - }}) + } } if err := validator.Close(); err != nil && !errors.Is(err, iofs.ErrClosed) { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, - }}) + } } case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(jsonMarshalerType): @@ -159,37 +137,35 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q fallthrough case val.Type().Implements(jsonMarshalerType): if val.Kind() == reflect.Pointer && val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } obj, ok := val.Interface().(json.Marshaler) if !ok { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } dat, err := obj.MarshalJSON() if err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, - }}) + } } // Use a sub-ReEncoder to check that it's a full element. validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) if _, err := validator.Write(dat); err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, - }}) + } } if err := validator.Close(); err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, - }}) + } } case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(textMarshalerType): @@ -197,61 +173,86 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q fallthrough case val.Type().Implements(textMarshalerType): if val.Kind() == reflect.Pointer && val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } obj, ok := val.Interface().(encoding.TextMarshaler) if !ok { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } text, err := obj.MarshalText() if err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalText", Err: err, - }}) + } + } + if err := encodeStringFromBytes(w, escaper, text); err != nil { + return err } - encodeStringFromBytes(w, escaper, text) - default: switch val.Kind() { case reflect.Bool: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } if val.Bool() { - encodeWriteString(w, "true") + if _, err := w.WriteString("true"); err != nil { + return err + } } else { - encodeWriteString(w, "false") + if _, err := w.WriteString("false"); err != nil { + return err + } } if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if _, err := w.WriteString(strconv.FormatInt(val.Int(), 10)); err != nil { + return err } - encodeWriteString(w, strconv.FormatInt(val.Int(), 10)) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if _, err := w.WriteString(strconv.FormatUint(val.Uint(), 10)); err != nil { + return err } - encodeWriteString(w, strconv.FormatUint(val.Uint(), 10)) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.Float32, reflect.Float64: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if err := encodeTODO(w, val); err != nil { + return err } - encodeTODO(w, val) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.String: if val.Type() == numberType { @@ -260,29 +261,47 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q numStr = "0" } if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if _, err := w.WriteString(numStr); err != nil { + return err } - encodeWriteString(w, numStr) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } } else { if quote { var buf bytes.Buffer - encodeStringFromString(&buf, escaper, val.String()) - encodeStringFromBytes(w, escaper, buf.Bytes()) + if err := encodeStringFromString(&buf, escaper, val.String()); err != nil { + return err + } + if err := encodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { + return err + } } else { - encodeStringFromString(w, escaper, val.String()) + if err := encodeStringFromString(w, escaper, val.String()); err != nil { + return err + } } } case reflect.Interface: if val.IsNil() { - encodeWriteString(w, "null") + if _, err := w.WriteString("null"); err != nil { + return err + } } else { - encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen) + if err := encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen); err != nil { + return err + } } case reflect.Struct: - encodeWriteByte(w, '{') + if err := w.WriteByte('{'); err != nil { + return err + } empty := true for _, field := range indexStruct(val.Type()).byPos { fVal, err := val.FieldByIndexErr(field.Path) @@ -293,35 +312,45 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q continue } if !empty { - encodeWriteByte(w, ',') + if err := w.WriteByte(','); err != nil { + return err + } } empty = false - encodeStringFromString(w, escaper, field.Name) - encodeWriteByte(w, ':') - encode(w, fVal, escaper, field.Quote, cycleDepth, cycleSeen) + if err := encodeStringFromString(w, escaper, field.Name); err != nil { + return err + } + if err := w.WriteByte(':'); err != nil { + return err + } + if err := encode(w, fVal, escaper, field.Quote, cycleDepth, cycleSeen); err != nil { + return err + } + } + if err := w.WriteByte('}'); err != nil { + return err } - encodeWriteByte(w, '}') case reflect.Map: if val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } if val.Len() == 0 { - encodeWriteString(w, "{}") - return + return discardInt(w.WriteString("{}")) } if cycleDepth++; cycleDepth > startDetectingCyclesAfter { ptr := val.UnsafePointer() if _, seen := cycleSeen[ptr]; seen { - panic(encodeError{&EncodeValueError{ + return &EncodeValueError{ Value: val, Str: fmt.Sprintf("encountered a cycle via %s", val.Type()), - }}) + } } cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encodeWriteByte(w, '{') + if err := w.WriteByte('{'); err != nil { + return err + } type kv struct { K string @@ -332,14 +361,18 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q for i := 0; iter.Next(); i++ { // TODO: Avoid buffering the map key var k strings.Builder - encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen) + if err := encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil { + return err + } kStr := k.String() if kStr == "null" { kStr = `""` } if !strings.HasPrefix(kStr, `"`) { k.Reset() - encodeStringFromString(&k, escaper, kStr) + if err := encodeStringFromString(&k, escaper, kStr); err != nil { + return err + } kStr = k.String() } kvs[i].K = kStr @@ -351,17 +384,29 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q for i, kv := range kvs { if i > 0 { - encodeWriteByte(w, ',') + if err := w.WriteByte(','); err != nil { + return err + } + } + if _, err := w.WriteString(kv.K); err != nil { + return err + } + if err := w.WriteByte(':'); err != nil { + return err + } + if err := encode(w, kv.V, escaper, false, cycleDepth, cycleSeen); err != nil { + return err } - encodeWriteString(w, kv.K) - encodeWriteByte(w, ':') - encode(w, kv.V, escaper, false, cycleDepth, cycleSeen) } - encodeWriteByte(w, '}') + if err := w.WriteByte('}'); err != nil { + return err + } case reflect.Slice: switch { case val.IsNil(): - encodeWriteString(w, "null") + if _, err := w.WriteString("null"); err != nil { + return err + } case val.Type().Elem().Kind() == reflect.Uint8 && !(false || val.Type().Elem().Implements(encodableType) || reflect.PointerTo(val.Type().Elem()).Implements(encodableType) || @@ -369,11 +414,13 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q reflect.PointerTo(val.Type().Elem()).Implements(jsonMarshalerType) || val.Type().Elem().Implements(textMarshalerType) || reflect.PointerTo(val.Type().Elem()).Implements(textMarshalerType)): - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } enc := base64.NewEncoder(base64.StdEncoding, w) if val.CanConvert(byteSliceType) { if _, err := enc.Write(val.Convert(byteSliceType).Interface().([]byte)); err != nil { - panic(encodeError{err}) + return err } } else { // TODO: Surely there's a better way. @@ -381,14 +428,16 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q var buf [1]byte buf[0] = val.Index(i).Convert(byteType).Interface().(byte) if _, err := enc.Write(buf[:]); err != nil { - panic(encodeError{err}) + return err } } } if err := enc.Close(); err != nil { - panic(encodeError{err}) + return err + } + if err := w.WriteByte('"'); err != nil { + return err } - encodeWriteByte(w, '"') default: if cycleDepth++; cycleDepth > startDetectingCyclesAfter { // For slices, val.UnsafePointer() doesn't return a pointer to the slice header @@ -401,61 +450,80 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q len int }{val.UnsafePointer(), val.Len()} if _, seen := cycleSeen[ptr]; seen { - panic(encodeError{&EncodeValueError{ + return &EncodeValueError{ Value: val, Str: fmt.Sprintf("encountered a cycle via %s", val.Type()), - }}) + } } cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encodeArray(w, val, escaper, cycleDepth, cycleSeen) + if err := encodeArray(w, val, escaper, cycleDepth, cycleSeen); err != nil { + return err + } } case reflect.Array: - encodeArray(w, val, escaper, cycleDepth, cycleSeen) + if err := encodeArray(w, val, escaper, cycleDepth, cycleSeen); err != nil { + return err + } case reflect.Pointer: if val.IsNil() { - encodeWriteString(w, "null") + if _, err := w.WriteString("null"); err != nil { + return err + } } else { if cycleDepth++; cycleDepth > startDetectingCyclesAfter { ptr := val.UnsafePointer() if _, seen := cycleSeen[ptr]; seen { - panic(encodeError{&EncodeValueError{ + return &EncodeValueError{ Value: val, Str: fmt.Sprintf("encountered a cycle via %s", val.Type()), - }}) + } } cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen) + if err := encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen); err != nil { + return err + } } default: - panic(encodeError{&EncodeTypeError{ + return &EncodeTypeError{ Type: val.Type(), - }}) + } } } + return nil } -func encodeArray(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { - encodeWriteByte(w, '[') +func encodeArray(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error { + if err := w.WriteByte('['); err != nil { + return err + } n := val.Len() for i := 0; i < n; i++ { if i > 0 { - encodeWriteByte(w, ',') + if err := w.WriteByte(','); err != nil { + return err + } } - encode(w, val.Index(i), escaper, false, cycleDepth, cycleSeen) + if err := encode(w, val.Index(i), escaper, false, cycleDepth, cycleSeen); err != nil { + return err + } + } + if err := w.WriteByte(']'); err != nil { + return err } - encodeWriteByte(w, ']') + return nil } -func encodeTODO(w io.Writer, val reflect.Value) { +func encodeTODO(w io.Writer, val reflect.Value) error { bs, err := json.Marshal(val.Interface()) if err != nil { - panic(encodeError{err}) + return err } if _, err := w.Write(bs); err != nil { - panic(encodeError{err}) + return err } + return nil } diff --git a/encode_string.go b/encode_string.go index 831a038..12f934e 100644 --- a/encode_string.go +++ b/encode_string.go @@ -83,29 +83,47 @@ func writeStringChar(w internal.AllWriter, c rune, wasEscaped BackslashEscapeMod } } -func encodeStringFromString(w internal.AllWriter, escaper BackslashEscaper, str string) { - encodeWriteByte(w, '"') +func encodeStringFromString(w internal.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } for _, c := range str { if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) + return err } } - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + return nil } -func encodeStringFromBytes(w internal.AllWriter, escaper BackslashEscaper, str []byte) { - encodeWriteByte(w, '"') +func encodeStringFromBytes(w internal.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) + return err } i += size } - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + return nil } func init() { - internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(internal.NewAllWriter(w), nil, s) } - internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(internal.NewAllWriter(w), nil, s) } + internal.EncodeStringFromString = func(w io.Writer, s string) { + if err := encodeStringFromString(internal.NewAllWriter(w), nil, s); err != nil { + panic(err) + } + } + internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { + if err := encodeStringFromBytes(internal.NewAllWriter(w), nil, s); err != nil { + panic(err) + } + } } -- cgit v1.2.3-2-g168b From 19f9c9c972c5cfc64de08ba581cc24d96426e73c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 27 Jan 2023 13:44:43 -0700 Subject: reencode: Rethink CompactIfUnder to have linear memory --- internal/parse.go | 4 +++ reencode.go | 79 +++++++++++++++++++++++++++++++------------------------ 2 files changed, 49 insertions(+), 34 deletions(-) diff --git a/internal/parse.go b/internal/parse.go index b11aae6..9db57fb 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -349,6 +349,10 @@ func (par *Parser) StackIsEmpty() bool { return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny) } +func (par *Parser) StackSize() int { + return len(par.stack) +} + // Reset all Parser state. func (par *Parser) Reset() { *par = Parser{ diff --git a/reencode.go b/reencode.go index 393e8c6..b3f4d20 100644 --- a/reencode.go +++ b/reencode.go @@ -35,8 +35,8 @@ type ReEncoderConfig struct { // // Has no affect if Compact is true or Indent is empty. // - // This has O((CompactIfUnder+1)^2) memory overhead, so set - // with caution. + // his has O(2^min(CompactIfUnder, depth)) time overhead, so + // set with caution. CompactIfUnder int // String to use to indent; ignored if Compact is true. @@ -82,7 +82,7 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // This is useful for prettifying, minifying, sanitizing, and/or // validating JSON. // -// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). +// The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { ReEncoderConfig out internal.AllWriter @@ -111,10 +111,16 @@ type ReEncoder struct { } type speculation struct { - compactFmt ReEncoder - compactBuf bytes.Buffer - indentFmt ReEncoder - indentBuf bytes.Buffer + endWhenStackSize int + fmt ReEncoder + compact bytes.Buffer + buf []inputTuple +} + +type inputTuple struct { + c rune + t internal.RuneType + stackSize int } // public API ////////////////////////////////////////////////////////////////// @@ -207,7 +213,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if err := enc.handleRune(0, internal.RuneTypeError); err != nil { + if err := enc.handleRune(0, internal.RuneTypeError, enc.par.StackSize()); err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, Offset: enc.inputPos, @@ -249,7 +255,7 @@ rehandle: } return enc.written, enc.err } - enc.err = enc.handleRune(c, t) + enc.err = enc.handleRune(c, t, enc.par.StackSize()) if enc.err == nil && t == internal.RuneTypeEOF { if enc.AllowMultipleValues { enc.par.Reset() @@ -269,7 +275,7 @@ rehandle: // internal //////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { +func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -282,17 +288,20 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { return err } specu := &speculation{ - compactFmt: *enc, - indentFmt: *enc, + endWhenStackSize: stackSize - 1, + fmt: ReEncoder{ + ReEncoderConfig: enc.ReEncoderConfig, + }, } - specu.compactFmt.Compact = true - specu.compactFmt.out = &specu.compactBuf - specu.indentFmt.out = &specu.indentBuf + specu.fmt.Compact = true + specu.fmt.out = &specu.compact enc.handleRuneState.specu = specu - if err := specu.compactFmt.handleRuneMain(c, t); err != nil { - return err - } - if err := specu.indentFmt.handleRuneMain(c, t); err != nil { + enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + c: c, + t: t, + stackSize: stackSize, + }) + if err := specu.fmt.handleRuneMain(c, t); err != nil { return err } default: @@ -301,26 +310,28 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { } } } else { // speculating - - // canCompress is whether we're 1-up from the leaf; - // set this *before* the calls to .handleRune. - canCompress := enc.handleRuneState.specu.indentFmt.handleRuneState.specu == nil - - if err := enc.handleRuneState.specu.compactFmt.handleRune(c, t); err != nil { + enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + c: c, + t: t, + stackSize: stackSize, + }) + if err := enc.handleRuneState.specu.fmt.handleRune(c, t, stackSize); err != nil { return err } - if err := enc.handleRuneState.specu.indentFmt.handleRune(c, t); err != nil { - return err - } - switch { - case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent - if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil { + case enc.handleRuneState.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + buf := enc.handleRuneState.specu.buf + enc.handleRuneState.specu = nil + if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } - enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState - case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil { + for _, tuple := range buf[1:] { + if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + return err + } + } + case stackSize == enc.handleRuneState.specu.endWhenStackSize: // stop speculating; use compact + if _, err := enc.handleRuneState.specu.compact.WriteTo(enc.out); err != nil { return err } enc.handleRuneState.lastNonSpace = t -- cgit v1.2.3-2-g168b From bf28b48d23d19990190d5e5aeaee14ea6652a293 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 10:05:06 -0700 Subject: reencode: Wrapping the handleRune state is no longer necessary --- reencode.go | 101 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 49 insertions(+), 52 deletions(-) diff --git a/reencode.go b/reencode.go index b3f4d20..d588b1b 100644 --- a/reencode.go +++ b/reencode.go @@ -98,16 +98,13 @@ type ReEncoder struct { inputPos int64 // state: .handleRune - handleRuneState struct { - lastNonSpace internal.RuneType - wasNumber bool - curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings - fracZeros int64 - expZero bool - - specu *speculation - } + lastNonSpace internal.RuneType + wasNumber bool + curIndent int + uhex [4]byte // "\uABCD"-encoded characters in strings + fracZeros int64 + expZero bool + specu *speculation } type speculation struct { @@ -281,7 +278,7 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } // main - if enc.handleRuneState.specu == nil { // not speculating + if enc.specu == nil { // not speculating switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { @@ -295,8 +292,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } specu.fmt.Compact = true specu.fmt.out = &specu.compact - enc.handleRuneState.specu = specu - enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + enc.specu = specu + enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, stackSize: stackSize, @@ -310,18 +307,18 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } } } else { // speculating - enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, stackSize: stackSize, }) - if err := enc.handleRuneState.specu.fmt.handleRune(c, t, stackSize); err != nil { + if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { return err } switch { - case enc.handleRuneState.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent - buf := enc.handleRuneState.specu.buf - enc.handleRuneState.specu = nil + case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + buf := enc.specu.buf + enc.specu = nil if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } @@ -330,13 +327,13 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err return err } } - case stackSize == enc.handleRuneState.specu.endWhenStackSize: // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compact.WriteTo(enc.out); err != nil { + case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact + if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { return err } - enc.handleRuneState.lastNonSpace = t - enc.handleRuneState.curIndent-- - enc.handleRuneState.specu = nil + enc.lastNonSpace = t + enc.curIndent-- + enc.specu = nil } } @@ -358,9 +355,9 @@ func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error // the new rune itself is handled. func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // emit newlines between top-level values - if enc.handleRuneState.lastNonSpace == internal.RuneTypeEOF { + if enc.lastNonSpace == internal.RuneTypeEOF { switch { - case enc.handleRuneState.wasNumber && t.IsNumber(): + case enc.wasNumber && t.IsNumber(): if err := enc.emitByte('\n'); err != nil { return err, false } @@ -374,35 +371,35 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits case internal.RuneTypeNumberFracDot: - enc.handleRuneState.fracZeros = 0 + enc.fracZeros = 0 case internal.RuneTypeNumberFracDig: - if c == '0' && enc.handleRuneState.lastNonSpace == internal.RuneTypeNumberFracDig { - enc.handleRuneState.fracZeros++ + if c == '0' && enc.lastNonSpace == internal.RuneTypeNumberFracDig { + enc.fracZeros++ return nil, false } fallthrough default: - for enc.handleRuneState.fracZeros > 0 { + for enc.fracZeros > 0 { if err := enc.emitByte('0'); err != nil { return err, false } - enc.handleRuneState.fracZeros-- + enc.fracZeros-- } } switch t { // trim leading '0's from the exponent-part, but don't remove all digits case internal.RuneTypeNumberExpE, internal.RuneTypeNumberExpSign: - enc.handleRuneState.expZero = true + enc.expZero = true case internal.RuneTypeNumberExpDig: - if c == '0' && enc.handleRuneState.expZero { + if c == '0' && enc.expZero { return nil, false } - enc.handleRuneState.expZero = false + enc.expZero = false default: - if enc.handleRuneState.expZero { + if enc.expZero { if err := enc.emitByte('0'); err != nil { return err, false } - enc.handleRuneState.expZero = false + enc.expZero = false } } @@ -418,8 +415,8 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // let us manage whitespace, don't pass it through return nil, false case internal.RuneTypeObjectEnd, internal.RuneTypeArrayEnd: - enc.handleRuneState.curIndent-- - switch enc.handleRuneState.lastNonSpace { + enc.curIndent-- + switch enc.lastNonSpace { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // collapse default: @@ -428,7 +425,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } } default: - switch enc.handleRuneState.lastNonSpace { + switch enc.lastNonSpace { case internal.RuneTypeObjectBeg, internal.RuneTypeObjectComma, internal.RuneTypeArrayBeg, internal.RuneTypeArrayComma: if err := enc.emitNlIndent(); err != nil { return err, false @@ -440,7 +437,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: - enc.handleRuneState.curIndent++ + enc.curIndent++ } } } @@ -452,7 +449,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { defer func() { if t != internal.RuneTypeSpace { - enc.handleRuneState.lastNonSpace = t + enc.lastNonSpace = t } }() @@ -484,33 +481,33 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { panic("should not happen") } case internal.RuneTypeStringEscUA: - enc.handleRuneState.uhex[0], _ = internal.HexToInt(c) + enc.uhex[0], _ = internal.HexToInt(c) return nil case internal.RuneTypeStringEscUB: - enc.handleRuneState.uhex[1], _ = internal.HexToInt(c) + enc.uhex[1], _ = internal.HexToInt(c) return nil case internal.RuneTypeStringEscUC: - enc.handleRuneState.uhex[2], _ = internal.HexToInt(c) + enc.uhex[2], _ = internal.HexToInt(c) return nil case internal.RuneTypeStringEscUD: - enc.handleRuneState.uhex[3], _ = internal.HexToInt(c) + enc.uhex[3], _ = internal.HexToInt(c) c := 0 | - rune(enc.handleRuneState.uhex[0])<<12 | - rune(enc.handleRuneState.uhex[1])<<8 | - rune(enc.handleRuneState.uhex[2])<<4 | - rune(enc.handleRuneState.uhex[3])<<0 + rune(enc.uhex[0])<<12 | + rune(enc.uhex[1])<<8 | + rune(enc.uhex[2])<<4 | + rune(enc.uhex[3])<<0 return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value - enc.handleRuneState.wasNumber = enc.handleRuneState.lastNonSpace.IsNumber() + enc.wasNumber = enc.lastNonSpace.IsNumber() switch { case enc.ForceTrailingNewlines: - t = internal.RuneTypeError // enc.handleRuneState.lastNonSpace : an NL isn't needed (we already printed one) + t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) return enc.emitByte('\n') default: - t = internal.RuneTypeEOF // enc.handleRuneState.lastNonSpace : an NL *might* be needed + t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed return nil } default: @@ -540,7 +537,7 @@ func (enc *ReEncoder) emitNlIndent() error { return err } } - for i := 0; i < enc.handleRuneState.curIndent; i++ { + for i := 0; i < enc.curIndent; i++ { if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { return err } -- cgit v1.2.3-2-g168b From 284be3f68b1eaf2ba693e0a8ae03baa80ebc973f Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 10:43:23 -0700 Subject: reencode: Reuse speculation buffers --- encode.go | 4 ++-- reencode.go | 34 +++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/encode.go b/encode.go index 57f3852..949fd55 100644 --- a/encode.go +++ b/encode.go @@ -116,7 +116,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q return discardInt(w.WriteString("null")) } // Use a sub-ReEncoder to check that it's a full element. - validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) + validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: escaper}} if err := obj.EncodeJSON(validator); err != nil { return &EncodeMethodError{ Type: val.Type(), @@ -152,7 +152,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q } } // Use a sub-ReEncoder to check that it's a full element. - validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) + validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: escaper}} if _, err := validator.Write(dat); err != nil { return &EncodeMethodError{ Type: val.Type(), diff --git a/reencode.go b/reencode.go index d588b1b..49d8ddb 100644 --- a/reencode.go +++ b/reencode.go @@ -72,6 +72,7 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ ReEncoderConfig: cfg, out: internal.NewAllWriter(out), + specu: new(speculation), } } @@ -108,12 +109,21 @@ type ReEncoder struct { } type speculation struct { + speculating bool endWhenStackSize int fmt ReEncoder compact bytes.Buffer buf []inputTuple } +func (specu *speculation) Reset() { + specu.speculating = false + specu.endWhenStackSize = 0 + specu.fmt = ReEncoder{} + specu.compact.Reset() + specu.buf = specu.buf[:0] +} + type inputTuple struct { c rune t internal.RuneType @@ -278,27 +288,25 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } // main - if enc.specu == nil { // not speculating + if !enc.specu.speculating { // not speculating switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { return err } - specu := &speculation{ - endWhenStackSize: stackSize - 1, - fmt: ReEncoder{ - ReEncoderConfig: enc.ReEncoderConfig, - }, + enc.specu.speculating = true + enc.specu.endWhenStackSize = stackSize - 1 + enc.specu.fmt = ReEncoder{ + ReEncoderConfig: enc.ReEncoderConfig, + out: &enc.specu.compact, } - specu.fmt.Compact = true - specu.fmt.out = &specu.compact - enc.specu = specu + enc.specu.fmt.Compact = true enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, stackSize: stackSize, }) - if err := specu.fmt.handleRuneMain(c, t); err != nil { + if err := enc.specu.fmt.handleRuneMain(c, t); err != nil { return err } default: @@ -317,8 +325,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } switch { case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent - buf := enc.specu.buf - enc.specu = nil + buf := append([]inputTuple(nil), enc.specu.buf...) + enc.specu.Reset() if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } @@ -331,9 +339,9 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { return err } + enc.specu.Reset() enc.lastNonSpace = t enc.curIndent-- - enc.specu = nil } } -- cgit v1.2.3-2-g168b From 1a79116fd80a2c290ffd498686ff3b6d4dd3745b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 22:51:28 -0700 Subject: encode: Nested ReEncoders don't need to inherit the backslash escaper --- encode.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/encode.go b/encode.go index 949fd55..5fb4fbf 100644 --- a/encode.go +++ b/encode.go @@ -116,7 +116,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q return discardInt(w.WriteString("null")) } // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: escaper}} + validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}} if err := obj.EncodeJSON(validator); err != nil { return &EncodeMethodError{ Type: val.Type(), @@ -152,7 +152,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q } } // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: escaper}} + validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}} if _, err := validator.Write(dat); err != nil { return &EncodeMethodError{ Type: val.Type(), -- cgit v1.2.3-2-g168b From 659114fee7c39f06c408135169f2848a881dfe5e Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 22:43:32 -0700 Subject: reencode: I see handleRuneMain's defer handling showing up in the CPU profile --- reencode.go | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/reencode.go b/reencode.go index 49d8ddb..a5dc3c8 100644 --- a/reencode.go +++ b/reencode.go @@ -455,48 +455,40 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { - defer func() { - if t != internal.RuneTypeSpace { - enc.lastNonSpace = t - } - }() - + var err error switch t { case internal.RuneTypeStringChar: - return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: - return nil + // do nothing case internal.RuneTypeStringEsc1: switch c { case '"': - return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': - return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': - return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': - return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': - return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': - return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': - return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': - return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) default: panic("should not happen") } case internal.RuneTypeStringEscUA: enc.uhex[0], _ = internal.HexToInt(c) - return nil case internal.RuneTypeStringEscUB: enc.uhex[1], _ = internal.HexToInt(c) - return nil case internal.RuneTypeStringEscUC: enc.uhex[2], _ = internal.HexToInt(c) - return nil case internal.RuneTypeStringEscUD: enc.uhex[3], _ = internal.HexToInt(c) c := 0 | @@ -504,7 +496,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { rune(enc.uhex[1])<<8 | rune(enc.uhex[2])<<4 | rune(enc.uhex[3])<<0 - return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough @@ -513,14 +505,18 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { switch { case enc.ForceTrailingNewlines: t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - return enc.emitByte('\n') + err = enc.emitByte('\n') default: t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed - return nil } default: - return enc.emitByte(byte(c)) + err = enc.emitByte(byte(c)) } + + if t != internal.RuneTypeSpace { + enc.lastNonSpace = t + } + return err } func (enc *ReEncoder) emitByte(c byte) error { -- cgit v1.2.3-2-g168b From bc1bacc410ddfa444c5bf0e56f33a7da440658ae Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 11:42:51 -0700 Subject: decode: Inline noWsRuneTypeScanner into runeTypeScannerImpl --- decode.go | 10 ++--- decode_scan.go | 37 ++--------------- decode_scan_test.go | 113 ++++++++++------------------------------------------ 3 files changed, 30 insertions(+), 130 deletions(-) diff --git a/decode.go b/decode.go index 91be865..8fab267 100644 --- a/decode.go +++ b/decode.go @@ -104,12 +104,10 @@ const maxNestingDepth = 10000 // an io.Reader. func NewDecoder(r io.RuneScanner) *Decoder { return &Decoder{ - io: &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: r, - parser: internal.Parser{ - MaxDepth: maxNestingDepth, - }, + io: &runeTypeScannerImpl{ + inner: r, + parser: internal.Parser{ + MaxDepth: maxNestingDepth, }, }, } diff --git a/decode_scan.go b/decode_scan.go index 387fcea..261aaa6 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -77,6 +77,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err _, _, _ = sc.inner.ReadRune() default: sc.initialized = true + again: var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() sc.offset += int64(sc.rSize) @@ -91,6 +92,9 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err } else { sc.rErr = nil } + if sc.rType == internal.RuneTypeSpace { + goto again + } case io.EOF: sc.rType, err = sc.parser.HandleEOF() if err != nil { @@ -147,39 +151,6 @@ func (sc *runeTypeScannerImpl) InputOffset() int64 { return ret } -// noWSRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// - -type noWSRuneTypeScanner struct { - inner runeTypeScanner -} - -var _ runeTypeScanner = (*noWSRuneTypeScanner)(nil) - -func (sc *noWSRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { -again: - r, s, t, e := sc.inner.ReadRuneType() - if t == internal.RuneTypeSpace { - goto again - } - return r, s, t, e -} - -func (sc *noWSRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - -func (sc *noWSRuneTypeScanner) UnreadRune() error { return sc.inner.UnreadRune() } -func (sc *noWSRuneTypeScanner) Reset() { sc.inner.Reset() } -func (sc *noWSRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } - // elemRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// type elemRuneTypeScanner struct { diff --git a/decode_scan_test.go b/decode_scan_test.go index 6a430ab..5bf5e2a 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -69,7 +69,6 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -86,7 +85,6 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, -1, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, @@ -105,7 +103,6 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, -1, 0, nil}, {0, -1, 0, ErrInvalidUnreadRune}, @@ -125,7 +122,6 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -136,44 +132,7 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'0', 1, internal.RuneTypeNumberIntZero, nil}, - {',', 1, internal.RuneTypeArrayComma, nil}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - }}, - "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - }}, - "early-eof": {`{`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - }}, - "empty": {``, ``, []ReadRuneTypeResult{ - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - }}, - } - testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { - return &runeTypeScannerImpl{ - inner: reader, - } - }) -} - -func TestNoWSRuneTypeScanner(t *testing.T) { - t.Parallel() - testcases := map[string]runeTypeScannerTestcase{ - "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ + "tail-ws": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ {'{', 1, internal.RuneTypeObjectBeg, nil}, {'"', 1, internal.RuneTypeStringBeg, nil}, {'f', 1, internal.RuneTypeStringChar, nil}, @@ -189,39 +148,14 @@ func TestNoWSRuneTypeScanner(t *testing.T) { {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - }}, - "tail": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'0', 1, internal.RuneTypeNumberIntZero, nil}, + {',', 1, internal.RuneTypeArrayComma, nil}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, @@ -235,12 +169,15 @@ func TestNoWSRuneTypeScanner(t *testing.T) { {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, }}, + "empty": {``, ``, []ReadRuneTypeResult{ + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + }}, } testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { - return &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, + return &runeTypeScannerImpl{ + inner: reader, } }) } @@ -301,10 +238,8 @@ func TestElemRuneTypeScanner(t *testing.T) { t.Parallel() testRuneTypeScanner(t, toplevelTestcases, func(reader io.RuneScanner) runeTypeScanner { return &elemRuneTypeScanner{ - inner: &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, + inner: &runeTypeScannerImpl{ + inner: reader, }, } }) @@ -312,10 +247,8 @@ func TestElemRuneTypeScanner(t *testing.T) { t.Run("child", func(t *testing.T) { t.Parallel() testRuneTypeScanner(t, childTestcases, func(reader io.RuneScanner) runeTypeScanner { - inner := &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, + inner := &runeTypeScannerImpl{ + inner: reader, } var res ReadRuneTypeResult res.r, res.s, res.t, res.e = inner.ReadRuneType() @@ -332,10 +265,8 @@ func TestElemRuneTypeScanner(t *testing.T) { func TestElemRuneTypeScanner2(t *testing.T) { t.Parallel() - parent := &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: strings.NewReader(` { "foo" : 12.0 } `), - }, + parent := &runeTypeScannerImpl{ + inner: strings.NewReader(` { "foo" : 12.0 } `), } exp := []ReadRuneTypeResult{ {'{', 1, internal.RuneTypeObjectBeg, nil}, -- cgit v1.2.3-2-g168b From b3f4186f2b8e992f56f898784b1cd28bfd7550ca Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 29 Jan 2023 20:59:37 -0700 Subject: Invent "barriers" instead of nesting parsers --- decode.go | 56 ++++++----- decode_scan.go | 143 ++++++++-------------------- decode_scan_test.go | 263 +++++++++++++++++++++------------------------------- encode.go | 26 +++--- internal/parse.go | 120 +++++++++++++++++++++++- reencode.go | 23 ++++- 6 files changed, 324 insertions(+), 307 deletions(-) diff --git a/decode.go b/decode.go index 8fab267..60b530f 100644 --- a/decode.go +++ b/decode.go @@ -104,7 +104,7 @@ const maxNestingDepth = 10000 // an io.Reader. func NewDecoder(r io.RuneScanner) *Decoder { return &Decoder{ - io: &runeTypeScannerImpl{ + io: runeTypeScanner{ inner: r, parser: internal.Parser{ MaxDepth: maxNestingDepth, @@ -245,6 +245,7 @@ func (dec *Decoder) Decode(ptr any) (err error) { } dec.io.Reset() + dec.io.PushReadBarrier() defer func() { if r := recover(); r != nil { if de, ok := r.(decodeError); ok { @@ -257,6 +258,7 @@ func (dec *Decoder) Decode(ptr any) (err error) { } }() dec.decode(ptrVal.Elem(), false) + dec.io.PopReadBarrier() return nil } @@ -319,12 +321,21 @@ func (dec *Decoder) expectRuneType(ec rune, et internal.RuneType, gt reflect.Typ } } -type decRuneTypeScanner struct { +type decRuneScanner struct { dec *Decoder + eof bool } -func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { +func (sc *decRuneScanner) ReadRune() (rune, int, error) { + if sc.eof { + return 0, 0, io.EOF + } c, s, t, e := sc.dec.io.ReadRuneType() + if t == internal.RuneTypeEOF { + sc.eof = true + sc.dec.io.PopReadBarrier() + return 0, 0, io.EOF + } if e != nil { panic(decodeError{ Field: sc.dec.structStackStr(), @@ -333,28 +344,17 @@ func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, erro Err: e, }) } - return c, s, t, nil + return c, s, nil } -func (sc *decRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, _ := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - default: - return r, s, nil - } +func (sc *decRuneScanner) UnreadRune() error { + return sc.dec.io.UnreadRune() } -func (sc *decRuneTypeScanner) UnreadRune() error { return sc.dec.io.UnreadRune() } -func (sc *decRuneTypeScanner) InputOffset() int64 { return sc.dec.InputOffset() } -func (sc *decRuneTypeScanner) Reset() { sc.dec.io.Reset() } - -func (dec *Decoder) limitingScanner() runeTypeScanner { - return &elemRuneTypeScanner{ - inner: &decRuneTypeScanner{ - dec: dec, - }, +func (dec *Decoder) limitingScanner() io.RuneScanner { + dec.io.PushReadBarrier() + return &decRuneScanner{ + dec: dec, } } @@ -867,7 +867,12 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er } } }() - dec := NewDecoder(r) + var dec *Decoder + if dr, ok := r.(*decRuneScanner); ok { + dec = dr.dec + } else { + dec = NewDecoder(r) + } dec.posStackPush() defer dec.posStackPop() dec.decodeObject(nil, @@ -947,7 +952,12 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e } } }() - dec := NewDecoder(r) + var dec *Decoder + if dr, ok := r.(*decRuneScanner); ok { + dec = dr.dec + } else { + dec = NewDecoder(r) + } dec.posStackPush() defer dec.posStackPop() dec.decodeArray(nil, func() { diff --git a/decode_scan.go b/decode_scan.go index 261aaa6..e233caf 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -5,31 +5,12 @@ package lowmemjson import ( - "errors" "io" "git.lukeshu.com/go/lowmemjson/internal" ) -type runeTypeScanner interface { - // The returned error is a *ReadError, a *SyntaxError, or nil. - // An EOF condition is represented as one of: - // - // end of value but not file: (_, >0, RuneTypeEOF, nil) - // end of both value and file: (_, 0, RuneTypeEOF, nil) - // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) - // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) - ReadRuneType() (rune, int, internal.RuneType, error) - // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. - ReadRune() (rune, int, error) - UnreadRune() error - Reset() - InputOffset() int64 -} - -// runeTypeScannerImpl ///////////////////////////////////////////////////////////////////////////// - -type runeTypeScannerImpl struct { +type runeTypeScanner struct { // everything that is not "initialized by constructor" starts // out as the zero value. @@ -47,9 +28,7 @@ type runeTypeScannerImpl struct { rErr error } -var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) - -func (sc *runeTypeScannerImpl) Reset() { +func (sc *runeTypeScanner) Reset() { sc.parser.Reset() if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { sc.repeat = false @@ -69,7 +48,14 @@ func (sc *runeTypeScannerImpl) Reset() { } } -func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) { +// The returned error is a *ReadError, a *SyntaxError, or nil. +// An EOF condition is represented as one of: +// +// end of value but not file: (_, >0, RuneTypeEOF, nil) +// end of both value and file: (_, 0, RuneTypeEOF, nil) +// end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) +// end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) +func (sc *runeTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { switch { case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF): // do nothing @@ -117,24 +103,12 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err return sc.rRune, sc.rSize, sc.rType, sc.rErr } -func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - // UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). // // If the last call to .ReadRune() or .ReadRuneType() has already been // unread, or if that call returned a rune with size 0, then // ErrInvalidUnreadRune is returned. Otherwise, nil is returned. -func (sc *runeTypeScannerImpl) UnreadRune() error { +func (sc *runeTypeScanner) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } @@ -143,7 +117,7 @@ func (sc *runeTypeScannerImpl) UnreadRune() error { return nil } -func (sc *runeTypeScannerImpl) InputOffset() int64 { +func (sc *runeTypeScanner) InputOffset() int64 { ret := sc.offset if sc.repeat { ret -= int64(sc.rSize) @@ -151,76 +125,37 @@ func (sc *runeTypeScannerImpl) InputOffset() int64 { return ret } -// elemRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// - -type elemRuneTypeScanner struct { - inner runeTypeScanner - - parser internal.Parser - repeat bool - stuck bool - rType internal.RuneType - rErr error +func (sc *runeTypeScanner) PushReadBarrier() { + sc.parser.PushReadBarrier() } -var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) - -func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { - // Read it, run it through the parent's parser. - r, s, t, e := sc.inner.ReadRuneType() - - // Run it through our child parser. - if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) { - if sc.repeat || sc.stuck { - sc.repeat = false - } else { - var err error - if s > 0 { - sc.rType, err = sc.parser.HandleRune(r) - } else { - sc.rType, err = sc.parser.HandleEOF() - } - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.inner.InputOffset(), - Err: err, - } - } else { - sc.rErr = nil +func (sc *runeTypeScanner) PopReadBarrier() { + sc.parser.PopBarrier() + if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, } + } else { + sc.rErr = nil } - sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError - t, e = sc.rType, sc.rErr - } - - // Check if we need to truncate the result. - if t == internal.RuneTypeEOF { - if s > 0 { - _ = sc.inner.UnreadRune() + // tell it to use that rType and rErr + _ = sc.UnreadRune() // we set it up to always succeed + } else if sc.rType == internal.RuneTypeEOF { + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleEOF() + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset, + Err: err, + } + } else { + sc.rErr = nil } - return 0, 0, internal.RuneTypeEOF, nil } - - return r, s, t, e } - -func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - -func (sc *elemRuneTypeScanner) UnreadRune() error { - ret := sc.inner.UnreadRune() - sc.repeat = true - return ret -} - -func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } -func (sc *elemRuneTypeScanner) Reset() {} diff --git a/decode_scan_test.go b/decode_scan_test.go index 5bf5e2a..d0725e5 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -11,7 +11,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "git.lukeshu.com/go/lowmemjson/internal" ) @@ -23,8 +22,26 @@ type ReadRuneTypeResult struct { e error } +const ( + unreadRune = -1 + pushReadBarrier = -2 + popReadBarrier = -3 + reset = -4 +) + func (r ReadRuneTypeResult) String() string { - return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e) + switch r.s { + case unreadRune: + return fmt.Sprintf("{%q, unreadRune, %#v, %v}", r.r, r.t, r.e) + case pushReadBarrier: + return fmt.Sprintf("{%q, pushReadBarrier, %#v, %v}", r.r, r.t, r.e) + case popReadBarrier: + return fmt.Sprintf("{%q, popReadBarrier, %#v, %v}", r.r, r.t, r.e) + case reset: + return fmt.Sprintf("{%q, reset, %#v, %v}", r.r, r.t, r.e) + default: + return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e) + } } type runeTypeScannerTestcase struct { @@ -33,31 +50,6 @@ type runeTypeScannerTestcase struct { Exp []ReadRuneTypeResult } -func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneScanner) runeTypeScanner) { - for tcName, tc := range testcases { - tc := tc - t.Run(tcName, func(t *testing.T) { - t.Parallel() - reader := strings.NewReader(tc.Input) - sc := factory(reader) - var exp, act []string - for _, iExp := range tc.Exp { - var iAct ReadRuneTypeResult - if iExp.s < 0 { - iAct.s = iExp.s - iAct.e = sc.UnreadRune() - } else { - iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType() - } - exp = append(exp, iExp.String()) - act = append(act, iAct.String()) - } - assert.Equal(t, exp, act) - assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) - }) - } -} - func TestRuneTypeScanner(t *testing.T) { t.Parallel() testcases := map[string]runeTypeScannerTestcase{ @@ -86,7 +78,7 @@ func TestRuneTypeScanner(t *testing.T) { {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, + {0, unreadRune, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -104,8 +96,8 @@ func TestRuneTypeScanner(t *testing.T) { {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, - {0, -1, 0, ErrInvalidUnreadRune}, + {0, unreadRune, 0, nil}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -128,7 +120,7 @@ func TestRuneTypeScanner(t *testing.T) { {'0', 1, internal.RuneTypeNumberFracDig, nil}, {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, - {0, -1, 0, ErrInvalidUnreadRune}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, @@ -174,150 +166,103 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, }}, - } - testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { - return &runeTypeScannerImpl{ - inner: reader, - } - }) -} - -func TestElemRuneTypeScanner(t *testing.T) { - t.Parallel() - toplevelTestcases := map[string]runeTypeScannerTestcase{ - "basic": {`1`, ``, []ReadRuneTypeResult{ + "basic2": {`1`, ``, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'0', 1, internal.RuneTypeNumberIntZero, nil}, - {',', 1, internal.RuneTypeArrayComma, nil}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - }}, - "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{ + "fragment": {`1,`, ``, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, }}, - "fragment": {`1,`, `,`, []ReadRuneTypeResult{ + "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {0, pushReadBarrier, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeEOF, nil}, + {'}', 1, internal.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "early-eof": {`{`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - }}, } - - childTestcases := make(map[string]runeTypeScannerTestcase, len(toplevelTestcases)) - for tcName, tc := range toplevelTestcases { - tc.Input = `[` + tc.Input - tc.Exp = append([]ReadRuneTypeResult(nil), tc.Exp...) // copy - for i, res := range tc.Exp { - if se, ok := res.e.(*DecodeSyntaxError); ok { - seCopy := *se - seCopy.Offset++ - tc.Exp[i].e = &seCopy + func() { + childTestcases := make(map[string]runeTypeScannerTestcase) + for tcName, tc := range testcases { + canChild := true + for _, res := range tc.Exp { + if res.s == pushReadBarrier { + canChild = false + break + } } - } - childTestcases[tcName] = tc - } - - t.Run("top-level", func(t *testing.T) { - t.Parallel() - testRuneTypeScanner(t, toplevelTestcases, func(reader io.RuneScanner) runeTypeScanner { - return &elemRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, + if !canChild { + continue } - }) - }) - t.Run("child", func(t *testing.T) { - t.Parallel() - testRuneTypeScanner(t, childTestcases, func(reader io.RuneScanner) runeTypeScanner { - inner := &runeTypeScannerImpl{ - inner: reader, + tc.Input = `[1,` + tc.Input + tc.Exp = append([]ReadRuneTypeResult{ + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {',', 1, internal.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + }, tc.Exp...) + for i := 2; i < len(tc.Exp); i++ { + if se, ok := tc.Exp[i].e.(*DecodeSyntaxError); ok { + seCopy := *se + seCopy.Offset += 3 + tc.Exp[i].e = &seCopy + } } - var res ReadRuneTypeResult - res.r, res.s, res.t, res.e = inner.ReadRuneType() - require.Equal(t, - ReadRuneTypeResult{'[', 1, internal.RuneTypeArrayBeg, nil}.String(), - res.String()) - - return &elemRuneTypeScanner{ - inner: inner, + childTestcases["child-"+tcName] = tc + } + for tcName, tc := range childTestcases { + testcases[tcName] = tc + } + }() + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + t.Logf("input=%q", tc.Input) + reader := strings.NewReader(tc.Input) + sc := &runeTypeScanner{inner: reader} + var exp, act []string + for _, iExp := range tc.Exp { + var iAct ReadRuneTypeResult + switch iExp.s { + case unreadRune: + iAct.s = iExp.s + iAct.e = sc.UnreadRune() + case pushReadBarrier: + sc.PushReadBarrier() + iAct.s = iExp.s + case popReadBarrier: + sc.PopReadBarrier() + iAct.s = iExp.s + case reset: + sc.Reset() + iAct.s = iExp.s + default: + iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType() + } + exp = append(exp, iExp.String()) + act = append(act, iAct.String()) } + assert.Equal(t, exp, act) + assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) }) - }) -} - -func TestElemRuneTypeScanner2(t *testing.T) { - t.Parallel() - parent := &runeTypeScannerImpl{ - inner: strings.NewReader(` { "foo" : 12.0 } `), - } - exp := []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - } - expStr := make([]string, 0, len(exp)) - actStr := make([]string, 0, len(exp)) - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) - } - - child := &elemRuneTypeScanner{ - inner: parent, - } - exp = []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - } - expStr, actStr = nil, nil - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = child.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) - } - - exp = []ReadRuneTypeResult{ - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - } - expStr, actStr = nil, nil - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) } } diff --git a/encode.go b/encode.go index 5fb4fbf..ca4e060 100644 --- a/encode.go +++ b/encode.go @@ -9,17 +9,13 @@ import ( "encoding" "encoding/base64" "encoding/json" - "errors" "fmt" "io" - iofs "io/fs" "reflect" "sort" "strconv" "strings" "unsafe" - - "git.lukeshu.com/go/lowmemjson/internal" ) // Encodable is the interface implemented by types that can encode @@ -98,7 +94,7 @@ var ( const startDetectingCyclesAfter = 1000 -func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error { +func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error { if !val.IsValid() { return discardInt(w.WriteString("null")) } @@ -115,22 +111,22 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q if !ok { return discardInt(w.WriteString("null")) } - // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}} - if err := obj.EncodeJSON(validator); err != nil { + w.pushWriteBarrier() + if err := obj.EncodeJSON(w); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, } } - if err := validator.Close(); err != nil && !errors.Is(err, iofs.ErrClosed) { + if err := w.Close(); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, } } + w.popWriteBarrier() case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(jsonMarshalerType): val = val.Addr() @@ -151,22 +147,22 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q Err: err, } } - // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}} - if _, err := validator.Write(dat); err != nil { + w.pushWriteBarrier() + if _, err := w.Write(dat); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, } } - if err := validator.Close(); err != nil { + if err := w.Close(); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, } } + w.popWriteBarrier() case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(textMarshalerType): val = val.Addr() @@ -361,7 +357,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q for i := 0; iter.Next(); i++ { // TODO: Avoid buffering the map key var k strings.Builder - if err := encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil { + if err := encode(NewReEncoder(&k, ReEncoderConfig{BackslashEscape: escaper}), iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil { return err } kStr := k.String() @@ -496,7 +492,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q return nil } -func encodeArray(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error { +func encodeArray(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error { if err := w.WriteByte('['); err != nil { return err } diff --git a/internal/parse.go b/internal/parse.go index 9db57fb..36db4a9 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -313,6 +313,13 @@ type Parser struct { // a ["x","y" // ["x","y"] stack []RuneType + + barriers []barrier +} + +type barrier struct { + closed bool + stack []RuneType } func (par *Parser) init() { @@ -345,8 +352,22 @@ func (par *Parser) stackString() string { return buf.String() } +func (par *Parser) depth() int { + n := len(par.stack) + for _, barrier := range par.barriers { + n += len(barrier.stack) + } + return n +} + func (par *Parser) StackIsEmpty() bool { - return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny) + if len(par.barriers) > 0 { + return false + } + if len(par.stack) == 0 { + return true + } + return len(par.stack) == 1 && par.stack[0] == runeTypeAny } func (par *Parser) StackSize() int { @@ -360,6 +381,99 @@ func (par *Parser) Reset() { } } +// PushReadBarrier causes the parser to expect EOF once the end of the +// element that is started by the current top-of-stack is reached, +// until this is un-done with PopBarrier. It essentially turns the +// parser in to a sub-parser. +// +// PushReadBarrier may only be called at the beginning of an element, +// whether that be +// +// - runeTypeAny +// - RuneTypeObjectBeg +// - RuneTypeArrayBeg +// - RuneTypeStringBeg +// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig +// - RuneTypeTrueT +// - RuneTypeFalseF +// - RuneTypeNullN +func (par *Parser) PushReadBarrier() { + // Sanity checking. + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushReadBarrier call: empty stack")) + } + curState := par.stack[len(par.stack)-1] + switch curState { + case runeTypeAny, + RuneTypeObjectBeg, + RuneTypeArrayBeg, + RuneTypeStringBeg, + RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, + RuneTypeTrueT, + RuneTypeFalseF, + RuneTypeNullN: + // OK + default: + panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState)) + } + // Actually push. + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack[:len(par.stack)-1], + }) + par.stack = []RuneType{curState} +} + +// PushWriteBarrier causes the parser to expect EOF once the end of +// the about-to-start element is reached, until this is un-done with +// PopBarrier. It essentially turns the parser in to a sub-parser. +// +// PushWriteBarrier may only be called at the places where an element +// of any type may start: +// +// - runeTypeAny for top-level and object-value elements +// - RuneTypeArrayBeg for array-item elements +// +// PushWriteBarrier signals intent to write an element; if it is +// called in a place where an element is optional (at the beginning of +// an array), it becomes a syntax error to not write the element. +func (par *Parser) PushWriteBarrier() { + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushWriteBarrier call: empty stack")) + } + switch par.stack[len(par.stack)-1] { + case runeTypeAny: + par.popState() + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + case RuneTypeArrayBeg: + par.replaceState(RuneTypeArrayComma) + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + default: + panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) + } +} + +// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. +func (par *Parser) PopBarrier() { + if len(par.barriers) == 0 { + panic(errors.New("illegal PopBarrier call: empty barrier stack")) + } + barrier := par.barriers[len(par.barriers)-1] + par.barriers = par.barriers[:len(par.barriers)-1] + par.closed = barrier.closed + par.stack = append(barrier.stack, par.stack...) +} + // HandleEOF feeds EOF to the Parser. The returned RuneType is either // RuneTypeEOF or RuneTypeError. // @@ -439,12 +553,12 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '{': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeObjectBeg), nil case '[': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeArrayBeg), nil diff --git a/reencode.go b/reencode.go index a5dc3c8..eae80db 100644 --- a/reencode.go +++ b/reencode.go @@ -106,6 +106,9 @@ type ReEncoder struct { fracZeros int64 expZero bool specu *speculation + + // state: .pushBarrier and .popBarrier + stackInputPos []int64 } type speculation struct { @@ -227,7 +230,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() } return nil @@ -264,7 +267,7 @@ rehandle: } enc.err = enc.handleRune(c, t, enc.par.StackSize()) if enc.err == nil && t == internal.RuneTypeEOF { - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() goto rehandle } else { @@ -280,6 +283,20 @@ rehandle: return enc.written, enc.err } +// semi-public API ///////////////////////////////////////////////////////////// + +func (enc *ReEncoder) pushWriteBarrier() { + enc.par.PushWriteBarrier() + enc.stackInputPos = append(enc.stackInputPos, enc.inputPos) + enc.inputPos = 0 +} + +func (enc *ReEncoder) popWriteBarrier() { + enc.par.PopBarrier() + enc.inputPos += enc.stackInputPos[len(enc.stackInputPos)-1] + enc.stackInputPos = enc.stackInputPos[:len(enc.stackInputPos)-1] +} + // internal //////////////////////////////////////////////////////////////////// func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { @@ -503,7 +520,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines: + case enc.ForceTrailingNewlines && len(enc.stackInputPos) == 0: t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) err = enc.emitByte('\n') default: -- cgit v1.2.3-2-g168b From ccf8dc4b21bb1a547f118affab22bca3a02df270 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 30 Jan 2023 22:05:03 -0700 Subject: ReleaseNotes: Add an entry for this branch's changes --- ReleaseNotes.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index f1fccfb..bb366f3 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,3 +1,14 @@ +# v0.3.1 (TBD) + + Theme: Performance + + This release does a bunch of performance tuning and optimizations, + with no user-visible changes other than memory consumption and CPU + time. Based on benchmarks with a real-world use-case, it is now + roughly an order of magnitude faster, with much lower memory + consumption (the big-O of memory consumption was always pretty low, + but there were some big constant factors before). + # v0.3.0 (2023-01-30) Theme: Breaking changes -- cgit v1.2.3-2-g168b