diff options
| author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-18 21:57:39 -0700 | 
|---|---|---|
| committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-18 22:45:39 -0700 | 
| commit | edfc7aa91b542978ce28eb109b99a257650b62b4 (patch) | |
| tree | 3bc300355b7843a2c2ba3a0ec5db4346cdc87965 | |
| parent | cf75ff06887d7bbb5bbbd682587b9d3f5e474670 (diff) | |
decode_scan, jsonparse: Rework the behavior to make a bit more sense
 - decode_scan: Don't have .Reset() re-play an erroring rune
 - decode_Scan: Have RuneTypeEOF always be zero-width
 - jsonparse: Don't replace syntax errors with RuneTypeEOF if there's no barrier
| -rw-r--r-- | decode.go | 18 | ||||
| -rw-r--r-- | decode_scan.go | 71 | ||||
| -rw-r--r-- | decode_scan_test.go | 47 | ||||
| -rw-r--r-- | internal/jsonparse/parse.go | 6 | ||||
| -rw-r--r-- | reencode.go | 17 | 
5 files changed, 56 insertions, 103 deletions
| @@ -207,19 +207,17 @@ func (dec *Decoder) DecodeThenEOF(ptr any) (err error) {  	if err := dec.Decode(ptr); err != nil {  		return err  	} -	c, s, t, _ := dec.io.ReadRuneType() -	if t != jsonparse.RuneTypeEOF { -		panic(fmt.Errorf("should not happen: .ReadRuneType returned non-EOF after decode without .Reset being called: %v", t)) -	} -	if s > 0 { +	_, _, t, err := dec.io.ReadRuneType() +	switch t { +	case jsonparse.RuneTypeError:  		return &DecodeError{ -			Err: &DecodeSyntaxError{ -				Err:    fmt.Errorf("invalid character %q after top-level value", c), -				Offset: dec.InputOffset(), -			}, +			Err: err,  		} +	case jsonparse.RuneTypeEOF: +		return nil +	default: +		panic(fmt.Errorf("should not happen: .ReadRuneType returned non-error non-EOF after decode without .Reset being called: '%v'", t))  	} -	return nil  }  // Decode reads the next JSON element from the Decoder's input stream diff --git a/decode_scan.go b/decode_scan.go index 7911c01..fcf47ff 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -31,8 +31,7 @@ type runeTypeScanner struct {  // The returned error is a *ReadError, a *SyntaxError, or nil.  // An EOF condition is represented as one of:  // -//	end of value but not file:      (_, >0, RuneTypeEOF, nil) -//	end of both value and file:     (_, 0, RuneTypeEOF, nil) +//	end of value:                   (_, 0, RuneTypeEOF, nil)  //	end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})  //	end of file at start of value:  (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF})  func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { @@ -59,8 +58,14 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)  			} else {  				sc.rErr = nil  			} -			if sc.rType == jsonparse.RuneTypeSpace { +			switch sc.rType { +			case jsonparse.RuneTypeSpace:  				goto again +			case jsonparse.RuneTypeEOF: +				sc.offset -= int64(sc.rSize) +				sc.rRune = 0 +				sc.rSize = 0 +				_ = sc.inner.UnreadRune()  			}  		case io.EOF:  			sc.rType, err = sc.parser.HandleEOF() @@ -122,65 +127,13 @@ func (sc *runeTypeScanner) PopReadBarrier() {  		} else {  			sc.rErr = nil  		} -	case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: -		// re-figure the rType and rErr -		var err error -		sc.rType, err = sc.parser.HandleRune(sc.rRune) -		if err != nil { -			sc.rErr = &DecodeSyntaxError{ -				Offset: sc.offset - int64(sc.rSize), -				Err:    err, -			} -		} else { -			sc.rErr = nil -		} -		// tell it to use that rType and rErr -		_ = sc.UnreadRune() // we set it up to always succeed -	case sc.rType == jsonparse.RuneTypeEOF: -		// re-figure the rType and rErr -		var err error -		sc.rType, err = sc.parser.HandleEOF() -		if err != nil { -			sc.rErr = &DecodeSyntaxError{ -				Offset: sc.offset, -				Err:    err, -			} -		} else { -			sc.rErr = nil -		} +	case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF: +		sc.rTypeOK = false // forget the sticky EOF  	}  }  func (sc *runeTypeScanner) Reset() {  	sc.parser.Reset() -	switch { -	case sc.repeat: -		// re-figure the rType and rErr -		var err error -		sc.rType, err = sc.parser.HandleRune(sc.rRune) -		if err != nil { -			sc.rErr = &DecodeSyntaxError{ -				Offset: sc.offset - int64(sc.rSize), -				Err:    err, -			} -		} else { -			sc.rErr = nil -		} -	case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: -		// re-figure the rType and rErr -		var err error -		sc.rType, err = sc.parser.HandleRune(sc.rRune) -		if err != nil { -			sc.rErr = &DecodeSyntaxError{ -				Offset: sc.offset - int64(sc.rSize), -				Err:    err, -			} -		} else { -			sc.rErr = nil -		} -		// tell it to use that rType and rErr -		_ = sc.UnreadRune() // we set it up to always succeed -	default: -		sc.rTypeOK = false -	} +	sc.rTypeOK = false // forget any sticky errors/EOF +	sc.repeat = false  // feed the rune (if any) through the parser again  } diff --git a/decode_scan_test.go b/decode_scan_test.go index eaf2f37..17c40d5 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -112,8 +112,8 @@ func TestRuneTypeScanner(t *testing.T) {  			{',', 1, jsonparse.RuneTypeArrayComma, nil},  			{0, pushReadBarrier, 0, nil},  			{'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, -			{']', 1, jsonparse.RuneTypeEOF, nil}, -			{0, unreadRune, 0, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, unreadRune, 0, ErrInvalidUnreadRune},  			{0, popReadBarrier, 0, nil},  			{']', 1, jsonparse.RuneTypeArrayEnd, nil},  			{0, 0, jsonparse.RuneTypeEOF, nil}, @@ -149,17 +149,17 @@ func TestRuneTypeScanner(t *testing.T) {  		"multi-value1": {`1{}`, `{}`, []ReadRuneTypeResult{  			{0, pushReadBarrier, 0, nil},  			{'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, -			{'{', 1, jsonparse.RuneTypeEOF, nil}, -			{'{', 1, jsonparse.RuneTypeEOF, nil}, -			{'{', 1, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil},  			{0, popReadBarrier, 0, nil},  		}},  		"multi-value2": {`1{}`, ``, []ReadRuneTypeResult{  			{0, pushReadBarrier, 0, nil},  			{'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, -			{'{', 1, jsonparse.RuneTypeEOF, nil}, -			{'{', 1, jsonparse.RuneTypeEOF, nil}, -			{'{', 1, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil},  			{0, popReadBarrier, 0, nil},  			{0, reset, 0, nil},  			{0, pushReadBarrier, 0, nil}, @@ -196,11 +196,11 @@ func TestRuneTypeScanner(t *testing.T) {  			{0, popReadBarrier, 0, nil},  			// Test main.  			{'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, -			{',', 1, jsonparse.RuneTypeEOF, nil}, -			{',', 1, jsonparse.RuneTypeEOF, nil}, -			{',', 1, jsonparse.RuneTypeEOF, nil}, +			{',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, +			{',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, +			{',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}},  		}}, -		"child-fragment": {`[1,` + `1,`, ``, []ReadRuneTypeResult{ +		"child-fragment": {`[1,` + `1,`, `,`, []ReadRuneTypeResult{  			// Child prefix.  			{'[', 1, jsonparse.RuneTypeArrayBeg, nil},  			{'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, @@ -208,9 +208,9 @@ func TestRuneTypeScanner(t *testing.T) {  			{0, pushReadBarrier, 0, nil},  			// Test main.  			{'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, -			{',', 1, jsonparse.RuneTypeEOF, nil}, -			{',', 1, jsonparse.RuneTypeEOF, nil}, -			{',', 1, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil},  		}},  		"elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{  			{'{', 1, jsonparse.RuneTypeObjectBeg, nil}, @@ -225,22 +225,23 @@ func TestRuneTypeScanner(t *testing.T) {  			{'2', 1, jsonparse.RuneTypeNumberIntDig, nil},  			{'.', 1, jsonparse.RuneTypeNumberFracDot, nil},  			{'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, -			{'}', 1, jsonparse.RuneTypeEOF, nil}, -			{'}', 1, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil},  			{0, popReadBarrier, 0, nil},  			{'}', 1, jsonparse.RuneTypeObjectEnd, nil},  			{0, 0, jsonparse.RuneTypeEOF, nil},  			{0, 0, jsonparse.RuneTypeEOF, nil},  		}}, -		"invalid-number": {`1.2.3`, `.3`, []ReadRuneTypeResult{ +		"invalid-number": {`1.2.3`, ``, []ReadRuneTypeResult{  			{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},  			{'.', 1, jsonparse.RuneTypeNumberFracDot, nil},  			{'2', 1, jsonparse.RuneTypeNumberFracDig, nil}, -			{'.', 1, jsonparse.RuneTypeEOF, nil}, +			{'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q after top-level value", '.')}},  			{0, reset, 0, nil}, -			{'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q looking for beginning of value", '.')}}, +			{'3', 1, jsonparse.RuneTypeNumberIntDig, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil},  		}}, -		"trailing-garbage": {" 42 x", `x`, []ReadRuneTypeResult{ +		"trailing-garbage": {" 42 x", ``, []ReadRuneTypeResult{  			{0, pushReadBarrier, 0, nil},  			{'4', 1, jsonparse.RuneTypeNumberIntDig, nil},  			{0, unreadRune, 0, nil}, @@ -249,10 +250,10 @@ func TestRuneTypeScanner(t *testing.T) {  			{0, pushReadBarrier, 0, nil},  			{'4', 1, jsonparse.RuneTypeNumberIntDig, nil},  			{'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, -			{'x', 1, jsonparse.RuneTypeEOF, nil}, +			{0, 0, jsonparse.RuneTypeEOF, nil},  			{0, popReadBarrier, 0, nil},  			{0, popReadBarrier, 0, nil}, -			{'x', 1, jsonparse.RuneTypeEOF, nil}, +			{'x', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q after top-level value", 'x')}},  		}},  		"unread-reset": {`{}`, ``, []ReadRuneTypeResult{  			{'{', 1, jsonparse.RuneTypeObjectBeg, nil}, diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index d867cbc..06efc8c 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -545,7 +545,11 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {  		case 0x0020, 0x000A, 0x000D, 0x0009:  			return RuneTypeSpace, nil  		default: -			return RuneTypeEOF, nil +			if len(par.barriers) > 0 { +				return RuneTypeEOF, nil +			} else { +				return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) +			}  		}  	}  	switch par.stack[len(par.stack)-1] { diff --git a/reencode.go b/reencode.go index 7e9b5ff..0745c43 100644 --- a/reencode.go +++ b/reencode.go @@ -276,7 +276,6 @@ func (enc *ReEncoder) Close() error {  }  func (enc *ReEncoder) handleRune(c rune, size int) { -rehandle:  	t, err := enc.par.HandleRune(c)  	if err != nil {  		enc.err = &ReEncodeSyntaxError{ @@ -293,16 +292,14 @@ rehandle:  		return  	}  	if t == jsonparse.RuneTypeEOF { -		if enc.allowMultipleValues && len(enc.barriers) == 0 { -			enc.par.Reset() -			goto rehandle -		} else { -			enc.err = &ReEncodeSyntaxError{ -				Err:    fmt.Errorf("invalid character %q after top-level value", c), -				Offset: enc.inputPos, -			} -			return +		if len(enc.barriers) == 0 { +			panic(fmt.Errorf("should not happen: EOF for rune %q without write barriers", c))  		} +		enc.err = &ReEncodeSyntaxError{ +			Err:    fmt.Errorf("invalid character %q after top-level value", c), +			Offset: enc.inputPos, +		} +		return  	}  	enc.inputPos += int64(size) | 
