summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-18 22:45:44 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-18 22:45:44 -0700
commitd240d0b06c7b5711f583d961eddfc37d07d4546e (patch)
tree51580f5e3323e3e5bc060f9fa21da2028610330e
parentcf75ff06887d7bbb5bbbd682587b9d3f5e474670 (diff)
parentab0d686b9bb43a02f8d74c5e881782ab4e94e30b (diff)
Merge branch 'lukeshu/scan-behavior'
-rw-r--r--decode.go18
-rw-r--r--decode_scan.go71
-rw-r--r--decode_scan_test.go103
-rw-r--r--internal/jsonparse/parse.go37
-rw-r--r--reencode.go17
5 files changed, 131 insertions, 115 deletions
diff --git a/decode.go b/decode.go
index 8514ec4..491971a 100644
--- a/decode.go
+++ b/decode.go
@@ -207,19 +207,17 @@ func (dec *Decoder) DecodeThenEOF(ptr any) (err error) {
if err := dec.Decode(ptr); err != nil {
return err
}
- c, s, t, _ := dec.io.ReadRuneType()
- if t != jsonparse.RuneTypeEOF {
- panic(fmt.Errorf("should not happen: .ReadRuneType returned non-EOF after decode without .Reset being called: %v", t))
- }
- if s > 0 {
+ _, _, t, err := dec.io.ReadRuneType()
+ switch t {
+ case jsonparse.RuneTypeError:
return &DecodeError{
- Err: &DecodeSyntaxError{
- Err: fmt.Errorf("invalid character %q after top-level value", c),
- Offset: dec.InputOffset(),
- },
+ Err: err,
}
+ case jsonparse.RuneTypeEOF:
+ return nil
+ default:
+ panic(fmt.Errorf("should not happen: .ReadRuneType returned non-error non-EOF after decode without .Reset being called: '%v'", t))
}
- return nil
}
// Decode reads the next JSON element from the Decoder's input stream
diff --git a/decode_scan.go b/decode_scan.go
index 7911c01..fcf47ff 100644
--- a/decode_scan.go
+++ b/decode_scan.go
@@ -31,8 +31,7 @@ type runeTypeScanner struct {
// The returned error is a *ReadError, a *SyntaxError, or nil.
// An EOF condition is represented as one of:
//
-// end of value but not file: (_, >0, RuneTypeEOF, nil)
-// end of both value and file: (_, 0, RuneTypeEOF, nil)
+// end of value: (_, 0, RuneTypeEOF, nil)
// end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
// end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF})
func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) {
@@ -59,8 +58,14 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)
} else {
sc.rErr = nil
}
- if sc.rType == jsonparse.RuneTypeSpace {
+ switch sc.rType {
+ case jsonparse.RuneTypeSpace:
goto again
+ case jsonparse.RuneTypeEOF:
+ sc.offset -= int64(sc.rSize)
+ sc.rRune = 0
+ sc.rSize = 0
+ _ = sc.inner.UnreadRune()
}
case io.EOF:
sc.rType, err = sc.parser.HandleEOF()
@@ -122,65 +127,13 @@ func (sc *runeTypeScanner) PopReadBarrier() {
} else {
sc.rErr = nil
}
- case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0:
- // re-figure the rType and rErr
- var err error
- sc.rType, err = sc.parser.HandleRune(sc.rRune)
- if err != nil {
- sc.rErr = &DecodeSyntaxError{
- Offset: sc.offset - int64(sc.rSize),
- Err: err,
- }
- } else {
- sc.rErr = nil
- }
- // tell it to use that rType and rErr
- _ = sc.UnreadRune() // we set it up to always succeed
- case sc.rType == jsonparse.RuneTypeEOF:
- // re-figure the rType and rErr
- var err error
- sc.rType, err = sc.parser.HandleEOF()
- if err != nil {
- sc.rErr = &DecodeSyntaxError{
- Offset: sc.offset,
- Err: err,
- }
- } else {
- sc.rErr = nil
- }
+ case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF:
+ sc.rTypeOK = false // forget the sticky EOF
}
}
func (sc *runeTypeScanner) Reset() {
sc.parser.Reset()
- switch {
- case sc.repeat:
- // re-figure the rType and rErr
- var err error
- sc.rType, err = sc.parser.HandleRune(sc.rRune)
- if err != nil {
- sc.rErr = &DecodeSyntaxError{
- Offset: sc.offset - int64(sc.rSize),
- Err: err,
- }
- } else {
- sc.rErr = nil
- }
- case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0:
- // re-figure the rType and rErr
- var err error
- sc.rType, err = sc.parser.HandleRune(sc.rRune)
- if err != nil {
- sc.rErr = &DecodeSyntaxError{
- Offset: sc.offset - int64(sc.rSize),
- Err: err,
- }
- } else {
- sc.rErr = nil
- }
- // tell it to use that rType and rErr
- _ = sc.UnreadRune() // we set it up to always succeed
- default:
- sc.rTypeOK = false
- }
+ sc.rTypeOK = false // forget any sticky errors/EOF
+ sc.repeat = false // feed the rune (if any) through the parser again
}
diff --git a/decode_scan_test.go b/decode_scan_test.go
index eaf2f37..ee532c2 100644
--- a/decode_scan_test.go
+++ b/decode_scan_test.go
@@ -112,8 +112,8 @@ func TestRuneTypeScanner(t *testing.T) {
{',', 1, jsonparse.RuneTypeArrayComma, nil},
{0, pushReadBarrier, 0, nil},
{'2', 1, jsonparse.RuneTypeNumberIntDig, nil},
- {']', 1, jsonparse.RuneTypeEOF, nil},
- {0, unreadRune, 0, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, unreadRune, 0, ErrInvalidUnreadRune},
{0, popReadBarrier, 0, nil},
{']', 1, jsonparse.RuneTypeArrayEnd, nil},
{0, 0, jsonparse.RuneTypeEOF, nil},
@@ -122,6 +122,32 @@ func TestRuneTypeScanner(t *testing.T) {
{0, 0, jsonparse.RuneTypeEOF, nil},
}},
"tail-ws": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{
+ // Disable auto-child.
+ {0, pushReadBarrier, 0, nil},
+ {0, popReadBarrier, 0, nil},
+ // Test main.
+ {'{', 1, jsonparse.RuneTypeObjectBeg, nil},
+ {'"', 1, jsonparse.RuneTypeStringBeg, nil},
+ {'f', 1, jsonparse.RuneTypeStringChar, nil},
+ {'o', 1, jsonparse.RuneTypeStringChar, nil},
+ {'o', 1, jsonparse.RuneTypeStringChar, nil},
+ {'"', 1, jsonparse.RuneTypeStringEnd, nil},
+ {':', 1, jsonparse.RuneTypeObjectColon, nil},
+ {'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {'2', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {'.', 1, jsonparse.RuneTypeNumberFracDot, nil},
+ {'0', 1, jsonparse.RuneTypeNumberFracDig, nil},
+ {'}', 1, jsonparse.RuneTypeObjectEnd, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ }},
+ "child-tail-ws": {`[1,` + `{"foo": 12.0} `, ` `, []ReadRuneTypeResult{
+ // Child prefix.
+ {'[', 1, jsonparse.RuneTypeArrayBeg, nil},
+ {'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {',', 1, jsonparse.RuneTypeArrayComma, nil},
+ {0, pushReadBarrier, 0, nil},
+ // Test main.
{'{', 1, jsonparse.RuneTypeObjectBeg, nil},
{'"', 1, jsonparse.RuneTypeStringBeg, nil},
{'f', 1, jsonparse.RuneTypeStringChar, nil},
@@ -149,17 +175,17 @@ func TestRuneTypeScanner(t *testing.T) {
"multi-value1": {`1{}`, `{}`, []ReadRuneTypeResult{
{0, pushReadBarrier, 0, nil},
{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
- {'{', 1, jsonparse.RuneTypeEOF, nil},
- {'{', 1, jsonparse.RuneTypeEOF, nil},
- {'{', 1, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
{0, popReadBarrier, 0, nil},
}},
"multi-value2": {`1{}`, ``, []ReadRuneTypeResult{
{0, pushReadBarrier, 0, nil},
{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
- {'{', 1, jsonparse.RuneTypeEOF, nil},
- {'{', 1, jsonparse.RuneTypeEOF, nil},
- {'{', 1, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
{0, popReadBarrier, 0, nil},
{0, reset, 0, nil},
{0, pushReadBarrier, 0, nil},
@@ -196,11 +222,11 @@ func TestRuneTypeScanner(t *testing.T) {
{0, popReadBarrier, 0, nil},
// Test main.
{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
- {',', 1, jsonparse.RuneTypeEOF, nil},
- {',', 1, jsonparse.RuneTypeEOF, nil},
- {',', 1, jsonparse.RuneTypeEOF, nil},
+ {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}},
+ {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}},
+ {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}},
}},
- "child-fragment": {`[1,` + `1,`, ``, []ReadRuneTypeResult{
+ "child-fragment": {`[1,` + `1,`, `,`, []ReadRuneTypeResult{
// Child prefix.
{'[', 1, jsonparse.RuneTypeArrayBeg, nil},
{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
@@ -208,11 +234,15 @@ func TestRuneTypeScanner(t *testing.T) {
{0, pushReadBarrier, 0, nil},
// Test main.
{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
- {',', 1, jsonparse.RuneTypeEOF, nil},
- {',', 1, jsonparse.RuneTypeEOF, nil},
- {',', 1, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
}},
"elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{
+ // Disable auto-child.
+ {0, pushReadBarrier, 0, nil},
+ {0, popReadBarrier, 0, nil},
+ // Test main.
{'{', 1, jsonparse.RuneTypeObjectBeg, nil},
{'"', 1, jsonparse.RuneTypeStringBeg, nil},
{'f', 1, jsonparse.RuneTypeStringChar, nil},
@@ -225,22 +255,49 @@ func TestRuneTypeScanner(t *testing.T) {
{'2', 1, jsonparse.RuneTypeNumberIntDig, nil},
{'.', 1, jsonparse.RuneTypeNumberFracDot, nil},
{'0', 1, jsonparse.RuneTypeNumberFracDig, nil},
- {'}', 1, jsonparse.RuneTypeEOF, nil},
- {'}', 1, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
{0, popReadBarrier, 0, nil},
{'}', 1, jsonparse.RuneTypeObjectEnd, nil},
{0, 0, jsonparse.RuneTypeEOF, nil},
{0, 0, jsonparse.RuneTypeEOF, nil},
}},
- "invalid-number": {`1.2.3`, `.3`, []ReadRuneTypeResult{
+ "child-elem": {`[1,` + ` { "foo" : 12.0 } `, ` `, []ReadRuneTypeResult{
+ // Child prefix.
+ {'[', 1, jsonparse.RuneTypeArrayBeg, nil},
+ {'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {',', 1, jsonparse.RuneTypeArrayComma, nil},
+ {0, pushReadBarrier, 0, nil},
+ // Test main.
+ {'{', 1, jsonparse.RuneTypeObjectBeg, nil},
+ {'"', 1, jsonparse.RuneTypeStringBeg, nil},
+ {'f', 1, jsonparse.RuneTypeStringChar, nil},
+ {'o', 1, jsonparse.RuneTypeStringChar, nil},
+ {'o', 1, jsonparse.RuneTypeStringChar, nil},
+ {'"', 1, jsonparse.RuneTypeStringEnd, nil},
+ {':', 1, jsonparse.RuneTypeObjectColon, nil},
+ {0, pushReadBarrier, 0, nil},
+ {'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {'2', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {'.', 1, jsonparse.RuneTypeNumberFracDot, nil},
+ {'0', 1, jsonparse.RuneTypeNumberFracDig, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, popReadBarrier, 0, nil},
+ {'}', 1, jsonparse.RuneTypeObjectEnd, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
+ }},
+ "invalid-number": {`1.2.3`, ``, []ReadRuneTypeResult{
{'1', 1, jsonparse.RuneTypeNumberIntDig, nil},
{'.', 1, jsonparse.RuneTypeNumberFracDot, nil},
{'2', 1, jsonparse.RuneTypeNumberFracDig, nil},
- {'.', 1, jsonparse.RuneTypeEOF, nil},
+ {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q after top-level value", '.')}},
{0, reset, 0, nil},
- {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q looking for beginning of value", '.')}},
+ {'3', 1, jsonparse.RuneTypeNumberIntDig, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
}},
- "trailing-garbage": {" 42 x", `x`, []ReadRuneTypeResult{
+ "trailing-garbage": {" 42 x", ``, []ReadRuneTypeResult{
{0, pushReadBarrier, 0, nil},
{'4', 1, jsonparse.RuneTypeNumberIntDig, nil},
{0, unreadRune, 0, nil},
@@ -249,10 +306,10 @@ func TestRuneTypeScanner(t *testing.T) {
{0, pushReadBarrier, 0, nil},
{'4', 1, jsonparse.RuneTypeNumberIntDig, nil},
{'2', 1, jsonparse.RuneTypeNumberIntDig, nil},
- {'x', 1, jsonparse.RuneTypeEOF, nil},
+ {0, 0, jsonparse.RuneTypeEOF, nil},
{0, popReadBarrier, 0, nil},
{0, popReadBarrier, 0, nil},
- {'x', 1, jsonparse.RuneTypeEOF, nil},
+ {'x', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q after top-level value", 'x')}},
}},
"unread-reset": {`{}`, ``, []ReadRuneTypeResult{
{'{', 1, jsonparse.RuneTypeObjectBeg, nil},
diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index d867cbc..1c35533 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -324,7 +324,8 @@ type Parser struct {
}
type barrier struct {
- stack []RuneType
+ allowWS bool
+ stack []RuneType
}
func (par *Parser) init() {
@@ -387,9 +388,10 @@ func (par *Parser) Reset() {
}
// PushReadBarrier causes the parser to emit EOF once the end of the
-// element that is started by the current top-of-stack is reached,
-// until this is un-done with PopBarrier. It essentially turns the
-// parser in to a sub-parser.
+// element that is started by the current top-of-stack is reached
+// (which means that it will reject whitespace between the end of the
+// element and EOF), until this is un-done with PopBarrier. It
+// essentially turns the parser in to a sub-parser.
//
// PushReadBarrier may only be called at the beginning of an element,
// whether that be
@@ -424,14 +426,16 @@ func (par *Parser) PushReadBarrier() {
}
// Actually push.
par.barriers = append(par.barriers, barrier{
- stack: par.stack[:len(par.stack)-1],
+ allowWS: false,
+ stack: par.stack[:len(par.stack)-1],
})
par.stack = []RuneType{curState}
}
// PushWriteBarrier causes the parser to emit EOF once the end of the
-// about-to-start element is reached, until this is un-done with
-// PopBarrier. It essentially turns the parser in to a sub-parser.
+// about-to-start element is reached and any trailing whitespace has
+// been exhausted, until this is un-done with PopBarrier. It
+// essentially turns the parser in to a sub-parser.
//
// PushWriteBarrier may only be called at the places where an element
// of any type may start:
@@ -451,13 +455,15 @@ func (par *Parser) PushWriteBarrier() {
case runeTypeAny:
par.popState()
par.barriers = append(par.barriers, barrier{
- stack: par.stack,
+ allowWS: true,
+ stack: par.stack,
})
par.stack = []RuneType{runeTypeAny}
case RuneTypeArrayBeg:
par.replaceState(RuneTypeArrayComma)
par.barriers = append(par.barriers, barrier{
- stack: par.stack,
+ allowWS: true,
+ stack: par.stack,
})
par.stack = []RuneType{runeTypeAny}
default:
@@ -541,11 +547,16 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
}
par.init()
if len(par.stack) == 0 {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- default:
+ if len(par.barriers) == 0 || par.barriers[len(par.barriers)-1].allowWS {
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ }
+ }
+ if len(par.barriers) > 0 {
return RuneTypeEOF, nil
+ } else {
+ return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c)
}
}
switch par.stack[len(par.stack)-1] {
diff --git a/reencode.go b/reencode.go
index 7e9b5ff..0745c43 100644
--- a/reencode.go
+++ b/reencode.go
@@ -276,7 +276,6 @@ func (enc *ReEncoder) Close() error {
}
func (enc *ReEncoder) handleRune(c rune, size int) {
-rehandle:
t, err := enc.par.HandleRune(c)
if err != nil {
enc.err = &ReEncodeSyntaxError{
@@ -293,16 +292,14 @@ rehandle:
return
}
if t == jsonparse.RuneTypeEOF {
- if enc.allowMultipleValues && len(enc.barriers) == 0 {
- enc.par.Reset()
- goto rehandle
- } else {
- enc.err = &ReEncodeSyntaxError{
- Err: fmt.Errorf("invalid character %q after top-level value", c),
- Offset: enc.inputPos,
- }
- return
+ if len(enc.barriers) == 0 {
+ panic(fmt.Errorf("should not happen: EOF for rune %q without write barriers", c))
}
+ enc.err = &ReEncodeSyntaxError{
+ Err: fmt.Errorf("invalid character %q after top-level value", c),
+ Offset: enc.inputPos,
+ }
+ return
}
enc.inputPos += int64(size)