From b2b51abfdc2eaefe6cf4aaf8645bbc7c52d89ff9 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 14:54:36 -0700 Subject: fastio: NewAllWriter: Add a special case for io.Discard --- compat/json/compat.go | 3 +-- compat/json/testcompat_test.go | 4 ++-- internal/fastio/allwriter.go | 3 +++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/compat/json/compat.go b/compat/json/compat.go index 0b86732..c96470d 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -15,7 +15,6 @@ import ( "strconv" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" ) //nolint:stylecheck // ST1021 False positive; these aren't comments on individual types. @@ -173,7 +172,7 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { } func Valid(data []byte) bool { - formatter := lowmemjson.NewReEncoder(fastio.Discard, lowmemjson.ReEncoderConfig{ + formatter := lowmemjson.NewReEncoder(io.Discard, lowmemjson.ReEncoderConfig{ Compact: true, }) _, err := formatter.Write(data) diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go index c186678..42cbf5c 100644 --- a/compat/json/testcompat_test.go +++ b/compat/json/testcompat_test.go @@ -7,10 +7,10 @@ package json import ( "bytes" "encoding/json" + "io" _ "unsafe" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" @@ -25,7 +25,7 @@ var ( type scanner = lowmemjson.ReEncoderConfig func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { - return reencode(fastio.Discard, in, *scan) + return reencode(io.Discard, in, *scan) } func isValidNumber(s string) bool { diff --git a/internal/fastio/allwriter.go b/internal/fastio/allwriter.go index c587531..071d709 100644 --- a/internal/fastio/allwriter.go +++ b/internal/fastio/allwriter.go @@ -139,6 +139,9 @@ func (w writerYYNWrapper) WriteString(s string) (int, error) { return WriteStrin // the io.Writer already has any of the other write methods, then its // native version of those methods are used. func NewAllWriter(inner io.Writer) AllWriter { + if inner == io.Discard { + return Discard + } switch inner := inner.(type) { // 3 Y bits case AllWriter: // YYY: -- cgit v1.1-4-g5e80 From b24da028a13dc6156367dcf933eb16da4eea5663 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 12:35:36 -0700 Subject: jsonparse: Remove barrier.closed, as it is always false --- internal/jsonparse/parse.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 2f5c1ab..7afd6a1 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -324,8 +324,7 @@ type Parser struct { } type barrier struct { - closed bool - stack []RuneType + stack []RuneType } func (par *Parser) init() { @@ -425,8 +424,7 @@ func (par *Parser) PushReadBarrier() { } // Actually push. par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack[:len(par.stack)-1], + stack: par.stack[:len(par.stack)-1], }) par.stack = []RuneType{curState} } @@ -453,15 +451,13 @@ func (par *Parser) PushWriteBarrier() { case runeTypeAny: par.popState() par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} case RuneTypeArrayBeg: par.replaceState(RuneTypeArrayComma) par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} default: @@ -476,7 +472,7 @@ func (par *Parser) PopBarrier() { } barrier := par.barriers[len(par.barriers)-1] par.barriers = par.barriers[:len(par.barriers)-1] - par.closed = barrier.closed + par.closed = false par.stack = append(barrier.stack, par.stack...) } -- cgit v1.1-4-g5e80 From a0f2419af917ecc0f91a50b7fb8424615adf9237 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 18:21:08 -0700 Subject: jsonparse: Fix a mistake in the comments --- internal/jsonparse/parse.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 7afd6a1..d867cbc 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -386,7 +386,7 @@ func (par *Parser) Reset() { } } -// PushReadBarrier causes the parser to expect EOF once the end of the +// PushReadBarrier causes the parser to emit EOF once the end of the // element that is started by the current top-of-stack is reached, // until this is un-done with PopBarrier. It essentially turns the // parser in to a sub-parser. @@ -429,8 +429,8 @@ func (par *Parser) PushReadBarrier() { par.stack = []RuneType{curState} } -// PushWriteBarrier causes the parser to expect EOF once the end of -// the about-to-start element is reached, until this is un-done with +// PushWriteBarrier causes the parser to emit EOF once the end of the +// about-to-start element is reached, until this is un-done with // PopBarrier. It essentially turns the parser in to a sub-parser. // // PushWriteBarrier may only be called at the places where an element -- cgit v1.1-4-g5e80 From 0c112bc16d6970db1bee35710f6c8b0622510663 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 15:05:42 -0700 Subject: decode_scan: Move Reset() to be by PopReadBarrier() --- decode_scan.go | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/decode_scan.go b/decode_scan.go index 7a52975..b0fc7c3 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -28,28 +28,6 @@ type runeTypeScanner struct { rErr error } -func (sc *runeTypeScanner) Reset() { - sc.parser.Reset() - if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { - sc.repeat = false - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - // tell it to use that rType and rErr - _ = sc.UnreadRune() // we set it up to always succeed - } else { - sc.initialized = false - } -} - // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // @@ -161,3 +139,25 @@ func (sc *runeTypeScanner) PopReadBarrier() { } } } + +func (sc *runeTypeScanner) Reset() { + sc.parser.Reset() + if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { + sc.repeat = false + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, + } + } else { + sc.rErr = nil + } + // tell it to use that rType and rErr + _ = sc.UnreadRune() // we set it up to always succeed + } else { + sc.initialized = false + } +} -- cgit v1.1-4-g5e80 From 595249c55c24828c04ab682dd3b35883b74aa790 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 17:25:21 -0700 Subject: decode_scan: PopReadBarrier, Reset: De-couple the .repeat and EOF cases This duplicates code, but it's confusing thinking about them together. --- decode_scan.go | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/decode_scan.go b/decode_scan.go index b0fc7c3..507bc42 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -111,7 +111,20 @@ func (sc *runeTypeScanner) PushReadBarrier() { func (sc *runeTypeScanner) PopReadBarrier() { sc.parser.PopBarrier() - if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { + switch { + case sc.repeat: + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, + } + } else { + sc.rErr = nil + } + case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune) @@ -125,7 +138,7 @@ func (sc *runeTypeScanner) PopReadBarrier() { } // tell it to use that rType and rErr _ = sc.UnreadRune() // we set it up to always succeed - } else if sc.rType == jsonparse.RuneTypeEOF { + case sc.rType == jsonparse.RuneTypeEOF: // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleEOF() @@ -142,8 +155,20 @@ func (sc *runeTypeScanner) PopReadBarrier() { func (sc *runeTypeScanner) Reset() { sc.parser.Reset() - if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { - sc.repeat = false + switch { + case sc.repeat: + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, + } + } else { + sc.rErr = nil + } + case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune) @@ -157,7 +182,7 @@ func (sc *runeTypeScanner) Reset() { } // tell it to use that rType and rErr _ = sc.UnreadRune() // we set it up to always succeed - } else { + default: sc.initialized = false } } -- cgit v1.1-4-g5e80 From 113aa7184e1d2f3858e68301dd1c2a6de51e6c1a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 18:00:24 -0700 Subject: decode_scan: Go ahead and decrement sc.offset when unreading --- decode_scan.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/decode_scan.go b/decode_scan.go index 507bc42..85f3190 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -40,6 +40,7 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) case sc.initialized && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): // do nothing case sc.repeat: + sc.offset += int64(sc.rSize) _, _, _ = sc.inner.ReadRune() default: sc.initialized = true @@ -93,16 +94,13 @@ func (sc *runeTypeScanner) UnreadRune() error { return ErrInvalidUnreadRune } sc.repeat = true + sc.offset -= int64(sc.rSize) _ = sc.inner.UnreadRune() return nil } func (sc *runeTypeScanner) InputOffset() int64 { - ret := sc.offset - if sc.repeat { - ret -= int64(sc.rSize) - } - return ret + return sc.offset } func (sc *runeTypeScanner) PushReadBarrier() { -- cgit v1.1-4-g5e80 From c4c88aa65cfac511fcc830a7807da6ac9bde4d0a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 17:55:32 -0700 Subject: decode_scan: Add a bunch of tests --- decode_scan_test.go | 92 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 13 deletions(-) diff --git a/decode_scan_test.go b/decode_scan_test.go index 1d61157..eaf2f37 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -106,19 +106,16 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, }}, - "unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, - {'"', 1, jsonparse.RuneTypeStringBeg, nil}, - {'f', 1, jsonparse.RuneTypeStringChar, nil}, - {'o', 1, jsonparse.RuneTypeStringChar, nil}, - {'o', 1, jsonparse.RuneTypeStringChar, nil}, - {'"', 1, jsonparse.RuneTypeStringEnd, nil}, - {':', 1, jsonparse.RuneTypeObjectColon, nil}, + "unread-eof": {`[1,2]`, ``, []ReadRuneTypeResult{ + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, - {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, - {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {']', 1, jsonparse.RuneTypeEOF, nil}, + {0, unreadRune, 0, nil}, + {0, popReadBarrier, 0, nil}, + {']', 1, jsonparse.RuneTypeArrayEnd, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, {0, unreadRune, 0, ErrInvalidUnreadRune}, {0, 0, jsonparse.RuneTypeEOF, nil}, @@ -149,11 +146,32 @@ func TestRuneTypeScanner(t *testing.T) { {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, - "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ + "multi-value1": {`1{}`, `{}`, []ReadRuneTypeResult{ + {0, pushReadBarrier, 0, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + }}, + "multi-value2": {`1{}`, ``, []ReadRuneTypeResult{ + {0, pushReadBarrier, 0, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'{', 1, jsonparse.RuneTypeEOF, nil}, {'{', 1, jsonparse.RuneTypeEOF, nil}, {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {0, reset, 0, nil}, + {0, pushReadBarrier, 0, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "early-eof": {` {`, ``, []ReadRuneTypeResult{ {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, @@ -173,6 +191,22 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "fragment": {`1,`, ``, []ReadRuneTypeResult{ + // Disable auto-child. + {0, pushReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + // Test main. + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + }}, + "child-fragment": {`[1,` + `1,`, ``, []ReadRuneTypeResult{ + // Child prefix. + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + // Test main. {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {',', 1, jsonparse.RuneTypeEOF, nil}, {',', 1, jsonparse.RuneTypeEOF, nil}, @@ -198,13 +232,45 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, }}, + "invalid-number": {`1.2.3`, `.3`, []ReadRuneTypeResult{ + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'2', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'.', 1, jsonparse.RuneTypeEOF, nil}, + {0, reset, 0, nil}, + {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q looking for beginning of value", '.')}}, + }}, + "trailing-garbage": {" 42 x", `x`, []ReadRuneTypeResult{ + {0, pushReadBarrier, 0, nil}, + {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, unreadRune, 0, nil}, + {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, unreadRune, 0, nil}, + {0, pushReadBarrier, 0, nil}, + {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'x', 1, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + {'x', 1, jsonparse.RuneTypeEOF, nil}, + }}, + "unread-reset": {`{}`, ``, []ReadRuneTypeResult{ + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {0, unreadRune, 0, nil}, + {0, reset, 0, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + }}, } func() { childTestcases := make(map[string]runeTypeScannerTestcase) for tcName, tc := range testcases { canChild := true for _, res := range tc.Exp { - if res.s == pushReadBarrier { + if res.s == pushReadBarrier || res.s == reset { canChild = false break } -- cgit v1.1-4-g5e80 From 56e05985f999e9f3d8cda96026f5a2f68d28d016 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 18:07:16 -0700 Subject: decode_scan: s/initialized/rTypeOK/ --- decode_scan.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/decode_scan.go b/decode_scan.go index 85f3190..1f29a95 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -19,8 +19,8 @@ type runeTypeScanner struct { parser jsonparse.Parser // initialized by constructor offset int64 - initialized bool - repeat bool + rTypeOK bool + repeat bool rRune rune rSize int @@ -37,13 +37,13 @@ type runeTypeScanner struct { // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { switch { - case sc.initialized && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): + case sc.rTypeOK && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): // do nothing case sc.repeat: sc.offset += int64(sc.rSize) _, _, _ = sc.inner.ReadRune() default: - sc.initialized = true + sc.rTypeOK = true again: var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() @@ -181,6 +181,6 @@ func (sc *runeTypeScanner) Reset() { // tell it to use that rType and rErr _ = sc.UnreadRune() // we set it up to always succeed default: - sc.initialized = false + sc.rTypeOK = false } } -- cgit v1.1-4-g5e80 From ec0482f598f4b7b05c21b1d19fe8183665e3fe93 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 22:45:18 -0700 Subject: decode_scan: Fix a doc comment --- decode_scan.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/decode_scan.go b/decode_scan.go index 1f29a95..7911c01 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -84,11 +84,11 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) return sc.rRune, sc.rSize, sc.rType, sc.rErr } -// UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). +// UnreadRune undoes a call to .ReadRuneType(). // -// If the last call to .ReadRune() or .ReadRuneType() has already been -// unread, or if that call returned a rune with size 0, then -// ErrInvalidUnreadRune is returned. Otherwise, nil is returned. +// If the last call to .ReadRuneType() has already been unread, or if +// that call returned a rune with size 0, then ErrInvalidUnreadRune is +// returned. Otherwise, nil is returned. func (sc *runeTypeScanner) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune -- cgit v1.1-4-g5e80