From ab0d686b9bb43a02f8d74c5e881782ab4e94e30b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 12:53:05 -0700 Subject: jsonparse: Have PushReadBarrier reject trailing whitespace --- decode_scan_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++ internal/jsonparse/parse.go | 41 +++++++++++++++++++-------------- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/decode_scan_test.go b/decode_scan_test.go index 17c40d5..ee532c2 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -122,6 +122,32 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "tail-ws": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ + // Disable auto-child. + {0, pushReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + // Test main. + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + }}, + "child-tail-ws": {`[1,` + `{"foo": 12.0} `, ` `, []ReadRuneTypeResult{ + // Child prefix. + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + // Test main. {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, {'"', 1, jsonparse.RuneTypeStringBeg, nil}, {'f', 1, jsonparse.RuneTypeStringChar, nil}, @@ -213,6 +239,36 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ + // Disable auto-child. + {0, pushReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + // Test main. + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {0, pushReadBarrier, 0, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + }}, + "child-elem": {`[1,` + ` { "foo" : 12.0 } `, ` `, []ReadRuneTypeResult{ + // Child prefix. + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + // Test main. {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, {'"', 1, jsonparse.RuneTypeStringBeg, nil}, {'f', 1, jsonparse.RuneTypeStringChar, nil}, diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 06efc8c..1c35533 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -324,7 +324,8 @@ type Parser struct { } type barrier struct { - stack []RuneType + allowWS bool + stack []RuneType } func (par *Parser) init() { @@ -387,9 +388,10 @@ func (par *Parser) Reset() { } // PushReadBarrier causes the parser to emit EOF once the end of the -// element that is started by the current top-of-stack is reached, -// until this is un-done with PopBarrier. It essentially turns the -// parser in to a sub-parser. +// element that is started by the current top-of-stack is reached +// (which means that it will reject whitespace between the end of the +// element and EOF), until this is un-done with PopBarrier. It +// essentially turns the parser in to a sub-parser. // // PushReadBarrier may only be called at the beginning of an element, // whether that be @@ -424,14 +426,16 @@ func (par *Parser) PushReadBarrier() { } // Actually push. par.barriers = append(par.barriers, barrier{ - stack: par.stack[:len(par.stack)-1], + allowWS: false, + stack: par.stack[:len(par.stack)-1], }) par.stack = []RuneType{curState} } // PushWriteBarrier causes the parser to emit EOF once the end of the -// about-to-start element is reached, until this is un-done with -// PopBarrier. It essentially turns the parser in to a sub-parser. +// about-to-start element is reached and any trailing whitespace has +// been exhausted, until this is un-done with PopBarrier. It +// essentially turns the parser in to a sub-parser. // // PushWriteBarrier may only be called at the places where an element // of any type may start: @@ -451,13 +455,15 @@ func (par *Parser) PushWriteBarrier() { case runeTypeAny: par.popState() par.barriers = append(par.barriers, barrier{ - stack: par.stack, + allowWS: true, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} case RuneTypeArrayBeg: par.replaceState(RuneTypeArrayComma) par.barriers = append(par.barriers, barrier{ - stack: par.stack, + allowWS: true, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} default: @@ -541,16 +547,17 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { } par.init() if len(par.stack) == 0 { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - if len(par.barriers) > 0 { - return RuneTypeEOF, nil - } else { - return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) + if len(par.barriers) == 0 || par.barriers[len(par.barriers)-1].allowWS { + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil } } + if len(par.barriers) > 0 { + return RuneTypeEOF, nil + } else { + return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) + } } switch par.stack[len(par.stack)-1] { // any ///////////////////////////////////////////////////////////////////////////////////// -- cgit v1.2.3-2-g168b