diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-18 22:45:30 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-18 22:45:30 -0700 |
commit | cf75ff06887d7bbb5bbbd682587b9d3f5e474670 (patch) | |
tree | 7f8999f73fa4e4c5759a174cebed7c8919fa05e1 | |
parent | 1b773c966cdb6b38cc4bc73e218793db3a9442ba (diff) | |
parent | ec0482f598f4b7b05c21b1d19fe8183665e3fe93 (diff) |
Merge branch 'lukeshu/scan-tidy'
-rw-r--r-- | compat/json/compat.go | 3 | ||||
-rw-r--r-- | compat/json/testcompat_test.go | 4 | ||||
-rw-r--r-- | decode_scan.go | 97 | ||||
-rw-r--r-- | decode_scan_test.go | 92 | ||||
-rw-r--r-- | internal/fastio/allwriter.go | 3 | ||||
-rw-r--r-- | internal/jsonparse/parse.go | 20 |
6 files changed, 153 insertions, 66 deletions
diff --git a/compat/json/compat.go b/compat/json/compat.go index 0b86732..c96470d 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -15,7 +15,6 @@ import ( "strconv" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" ) //nolint:stylecheck // ST1021 False positive; these aren't comments on individual types. @@ -173,7 +172,7 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { } func Valid(data []byte) bool { - formatter := lowmemjson.NewReEncoder(fastio.Discard, lowmemjson.ReEncoderConfig{ + formatter := lowmemjson.NewReEncoder(io.Discard, lowmemjson.ReEncoderConfig{ Compact: true, }) _, err := formatter.Write(data) diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go index c186678..42cbf5c 100644 --- a/compat/json/testcompat_test.go +++ b/compat/json/testcompat_test.go @@ -7,10 +7,10 @@ package json import ( "bytes" "encoding/json" + "io" _ "unsafe" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" @@ -25,7 +25,7 @@ var ( type scanner = lowmemjson.ReEncoderConfig func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { - return reencode(fastio.Discard, in, *scan) + return reencode(io.Discard, in, *scan) } func isValidNumber(s string) bool { diff --git a/decode_scan.go b/decode_scan.go index 7a52975..7911c01 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -19,8 +19,8 @@ type runeTypeScanner struct { parser jsonparse.Parser // initialized by constructor offset int64 - initialized bool - repeat bool + rTypeOK bool + repeat bool rRune rune rSize int @@ -28,28 +28,6 @@ type runeTypeScanner struct { rErr error } -func (sc *runeTypeScanner) Reset() { - sc.parser.Reset() - if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { - sc.repeat = false - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - // tell it to use that rType and rErr - _ = sc.UnreadRune() // we set it up to always succeed - } else { - sc.initialized = false - } -} - // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // @@ -59,12 +37,13 @@ func (sc *runeTypeScanner) Reset() { // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { switch { - case sc.initialized && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): + case sc.rTypeOK && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): // do nothing case sc.repeat: + sc.offset += int64(sc.rSize) _, _, _ = sc.inner.ReadRune() default: - sc.initialized = true + sc.rTypeOK = true again: var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() @@ -105,26 +84,23 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) return sc.rRune, sc.rSize, sc.rType, sc.rErr } -// UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). +// UnreadRune undoes a call to .ReadRuneType(). // -// If the last call to .ReadRune() or .ReadRuneType() has already been -// unread, or if that call returned a rune with size 0, then -// ErrInvalidUnreadRune is returned. Otherwise, nil is returned. +// If the last call to .ReadRuneType() has already been unread, or if +// that call returned a rune with size 0, then ErrInvalidUnreadRune is +// returned. Otherwise, nil is returned. func (sc *runeTypeScanner) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } sc.repeat = true + sc.offset -= int64(sc.rSize) _ = sc.inner.UnreadRune() return nil } func (sc *runeTypeScanner) InputOffset() int64 { - ret := sc.offset - if sc.repeat { - ret -= int64(sc.rSize) - } - return ret + return sc.offset } func (sc *runeTypeScanner) PushReadBarrier() { @@ -133,7 +109,20 @@ func (sc *runeTypeScanner) PushReadBarrier() { func (sc *runeTypeScanner) PopReadBarrier() { sc.parser.PopBarrier() - if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { + switch { + case sc.repeat: + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, + } + } else { + sc.rErr = nil + } + case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune) @@ -147,7 +136,7 @@ func (sc *runeTypeScanner) PopReadBarrier() { } // tell it to use that rType and rErr _ = sc.UnreadRune() // we set it up to always succeed - } else if sc.rType == jsonparse.RuneTypeEOF { + case sc.rType == jsonparse.RuneTypeEOF: // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleEOF() @@ -161,3 +150,37 @@ func (sc *runeTypeScanner) PopReadBarrier() { } } } + +func (sc *runeTypeScanner) Reset() { + sc.parser.Reset() + switch { + case sc.repeat: + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, + } + } else { + sc.rErr = nil + } + case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, + } + } else { + sc.rErr = nil + } + // tell it to use that rType and rErr + _ = sc.UnreadRune() // we set it up to always succeed + default: + sc.rTypeOK = false + } +} diff --git a/decode_scan_test.go b/decode_scan_test.go index 1d61157..eaf2f37 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -106,19 +106,16 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, }}, - "unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, - {'"', 1, jsonparse.RuneTypeStringBeg, nil}, - {'f', 1, jsonparse.RuneTypeStringChar, nil}, - {'o', 1, jsonparse.RuneTypeStringChar, nil}, - {'o', 1, jsonparse.RuneTypeStringChar, nil}, - {'"', 1, jsonparse.RuneTypeStringEnd, nil}, - {':', 1, jsonparse.RuneTypeObjectColon, nil}, + "unread-eof": {`[1,2]`, ``, []ReadRuneTypeResult{ + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, - {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, - {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {']', 1, jsonparse.RuneTypeEOF, nil}, + {0, unreadRune, 0, nil}, + {0, popReadBarrier, 0, nil}, + {']', 1, jsonparse.RuneTypeArrayEnd, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, {0, unreadRune, 0, ErrInvalidUnreadRune}, {0, 0, jsonparse.RuneTypeEOF, nil}, @@ -149,11 +146,32 @@ func TestRuneTypeScanner(t *testing.T) { {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, - "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ + "multi-value1": {`1{}`, `{}`, []ReadRuneTypeResult{ + {0, pushReadBarrier, 0, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + }}, + "multi-value2": {`1{}`, ``, []ReadRuneTypeResult{ + {0, pushReadBarrier, 0, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'{', 1, jsonparse.RuneTypeEOF, nil}, {'{', 1, jsonparse.RuneTypeEOF, nil}, {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {0, reset, 0, nil}, + {0, pushReadBarrier, 0, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "early-eof": {` {`, ``, []ReadRuneTypeResult{ {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, @@ -173,6 +191,22 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "fragment": {`1,`, ``, []ReadRuneTypeResult{ + // Disable auto-child. + {0, pushReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + // Test main. + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + }}, + "child-fragment": {`[1,` + `1,`, ``, []ReadRuneTypeResult{ + // Child prefix. + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + // Test main. {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {',', 1, jsonparse.RuneTypeEOF, nil}, {',', 1, jsonparse.RuneTypeEOF, nil}, @@ -198,13 +232,45 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, }}, + "invalid-number": {`1.2.3`, `.3`, []ReadRuneTypeResult{ + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'2', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'.', 1, jsonparse.RuneTypeEOF, nil}, + {0, reset, 0, nil}, + {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q looking for beginning of value", '.')}}, + }}, + "trailing-garbage": {" 42 x", `x`, []ReadRuneTypeResult{ + {0, pushReadBarrier, 0, nil}, + {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, unreadRune, 0, nil}, + {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, unreadRune, 0, nil}, + {0, pushReadBarrier, 0, nil}, + {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'x', 1, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + {'x', 1, jsonparse.RuneTypeEOF, nil}, + }}, + "unread-reset": {`{}`, ``, []ReadRuneTypeResult{ + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {0, unreadRune, 0, nil}, + {0, reset, 0, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + }}, } func() { childTestcases := make(map[string]runeTypeScannerTestcase) for tcName, tc := range testcases { canChild := true for _, res := range tc.Exp { - if res.s == pushReadBarrier { + if res.s == pushReadBarrier || res.s == reset { canChild = false break } diff --git a/internal/fastio/allwriter.go b/internal/fastio/allwriter.go index c587531..071d709 100644 --- a/internal/fastio/allwriter.go +++ b/internal/fastio/allwriter.go @@ -139,6 +139,9 @@ func (w writerYYNWrapper) WriteString(s string) (int, error) { return WriteStrin // the io.Writer already has any of the other write methods, then its // native version of those methods are used. func NewAllWriter(inner io.Writer) AllWriter { + if inner == io.Discard { + return Discard + } switch inner := inner.(type) { // 3 Y bits case AllWriter: // YYY: diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 2f5c1ab..d867cbc 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -324,8 +324,7 @@ type Parser struct { } type barrier struct { - closed bool - stack []RuneType + stack []RuneType } func (par *Parser) init() { @@ -387,7 +386,7 @@ func (par *Parser) Reset() { } } -// PushReadBarrier causes the parser to expect EOF once the end of the +// PushReadBarrier causes the parser to emit EOF once the end of the // element that is started by the current top-of-stack is reached, // until this is un-done with PopBarrier. It essentially turns the // parser in to a sub-parser. @@ -425,14 +424,13 @@ func (par *Parser) PushReadBarrier() { } // Actually push. par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack[:len(par.stack)-1], + stack: par.stack[:len(par.stack)-1], }) par.stack = []RuneType{curState} } -// PushWriteBarrier causes the parser to expect EOF once the end of -// the about-to-start element is reached, until this is un-done with +// PushWriteBarrier causes the parser to emit EOF once the end of the +// about-to-start element is reached, until this is un-done with // PopBarrier. It essentially turns the parser in to a sub-parser. // // PushWriteBarrier may only be called at the places where an element @@ -453,15 +451,13 @@ func (par *Parser) PushWriteBarrier() { case runeTypeAny: par.popState() par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} case RuneTypeArrayBeg: par.replaceState(RuneTypeArrayComma) par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} default: @@ -476,7 +472,7 @@ func (par *Parser) PopBarrier() { } barrier := par.barriers[len(par.barriers)-1] par.barriers = par.barriers[:len(par.barriers)-1] - par.closed = barrier.closed + par.closed = false par.stack = append(barrier.stack, par.stack...) } |