summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@datawire.io>2022-08-16 22:40:19 -0600
committerLuke Shumaker <lukeshu@datawire.io>2022-08-17 00:12:35 -0600
commite57bee02e02b7e3697d6c3cb8b75923a92100427 (patch)
treef32610f2b73fbea1f2a94e108fabca18d31d5d27
parent87b02577e50b76d373e3c6b921d776e39cb83346 (diff)
Add tests for decode reading too far
-rw-r--r--decode.go5
-rw-r--r--decode_scan.go62
-rw-r--r--decode_scan_test.go128
-rw-r--r--decode_test.go21
-rw-r--r--errors.go1
5 files changed, 185 insertions, 32 deletions
diff --git a/decode.go b/decode.go
index e42c115..a17a572 100644
--- a/decode.go
+++ b/decode.go
@@ -114,7 +114,10 @@ func (dec *Decoder) stackName() string {
return strings.Join(fields, ".")
}
-func Decode(r io.Reader, ptr any) error {
+func Decode(r interface {
+ io.Reader
+ io.RuneScanner // enforce that the reader have .UnreadRune() so that we don't risk reading too far when decoding a number
+}, ptr any) error {
return NewDecoder(r).Decode(ptr)
}
diff --git a/decode_scan.go b/decode_scan.go
index fee9ec6..9fa6181 100644
--- a/decode_scan.go
+++ b/decode_scan.go
@@ -11,13 +11,11 @@ import (
type runeTypeScanner interface {
// The returned error is a *ReadError, a *SyntaxError, or nil.
- // An EOF condition is represented either as
+ // An EOF condition is represented as one of:
//
- // (char, size, RuneTypeEOF, nil)
- //
- // or
- //
- // (char, size, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
+ // end of value but not file: (_, >0, RuneTypeEOF, nil)
+ // end of both value and file: (_, 0, RuneTypeEOF, nil)
+ // end of file but not value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
ReadRuneType() (rune, int, RuneType, error)
// The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil.
ReadRune() (rune, int, error)
@@ -31,6 +29,8 @@ type runeTypeScanner interface {
type runeTypeScannerImpl struct {
inner io.RuneReader
+ initialized bool
+
parser Parser
offset int64
@@ -109,6 +109,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, RuneType, error) {
}
}
}
+ sc.initialized = true
sc.repeat = false
sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError
return sc.rRune, sc.rSize, sc.rType, sc.rErr
@@ -128,12 +129,13 @@ func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) {
var ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune")
-// UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). If the
-// last call to .ReadRune() or .ReadRuneType() has already been
-// unread, or if that call returned an error or RuneTypeEOF, then
-// ErrInvalidRune is returned. Otherwise, nil is returned.
+// UnreadRune undoes a call to .ReadRune() or .ReadRuneType().
+//
+// If the last call to .ReadRune() or .ReadRuneType() has already been
+// unread, or if that call returned a rune with size 0, then
+// ErrInvalidUnreadRune is returned. Otherwise, nil is returned.
func (sc *runeTypeScannerImpl) UnreadRune() error {
- if sc.stuck || sc.repeat {
+ if !sc.initialized || sc.repeat || sc.rSize == 0 {
return ErrInvalidUnreadRune
}
sc.repeat = true
@@ -191,27 +193,46 @@ type elemRuneTypeScanner struct {
parser Parser
repeat bool
+ stuck bool
rType RuneType
+ rErr error
}
var _ runeTypeScanner = (*elemRuneTypeScanner)(nil)
func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) {
+ // Read it, run it through the parent's parser.
r, s, t, e := sc.inner.ReadRuneType()
- // Check if we need to insert a premature EOF
- if t != RuneTypeError && t != RuneTypeEOF {
- if sc.repeat {
+ // Run it through our child parser.
+ if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) {
+ if sc.repeat || sc.stuck {
sc.repeat = false
} else {
- sc.rType, _ = sc.parser.HandleRune(r)
- }
- if sc.rType == RuneTypeEOF {
- _ = sc.inner.UnreadRune()
+ var err error
+ if s > 0 {
+ sc.rType, err = sc.parser.HandleRune(r)
+ } else {
+ sc.rType, err = sc.parser.HandleEOF()
+ }
+ if err != nil {
+ sc.rErr = &DecodeSyntaxError{
+ Offset: sc.inner.InputOffset(),
+ Err: err,
+ }
+ } else {
+ sc.rErr = nil
+ }
}
- t = sc.rType
+ sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError
+ t, e = sc.rType, sc.rErr
}
+
+ // Check if we need to truncate the result.
if t == RuneTypeEOF {
+ if s > 0 {
+ _ = sc.inner.UnreadRune()
+ }
return 0, 0, RuneTypeEOF, nil
}
@@ -231,8 +252,9 @@ func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) {
}
func (sc *elemRuneTypeScanner) UnreadRune() error {
+ ret := sc.inner.UnreadRune()
sc.repeat = true
- return sc.inner.UnreadRune()
+ return ret
}
func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() }
diff --git a/decode_scan_test.go b/decode_scan_test.go
index 8bc33e3..6fd9369 100644
--- a/decode_scan_test.go
+++ b/decode_scan_test.go
@@ -26,14 +26,16 @@ func (r ReadRuneTypeResult) String() string {
}
type runeTypeScannerTestcase struct {
- Input string
- Exp []ReadRuneTypeResult
+ Input string
+ ExpRemainder string
+ Exp []ReadRuneTypeResult
}
func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneReader) runeTypeScanner) {
for tcName, tc := range testcases {
t.Run(tcName, func(t *testing.T) {
- sc := factory(strings.NewReader(tc.Input))
+ reader := strings.NewReader(tc.Input)
+ sc := factory(reader)
var exp, act []string
for _, iExp := range tc.Exp {
var iAct ReadRuneTypeResult
@@ -47,13 +49,14 @@ func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestc
act = append(act, iAct.String())
}
assert.Equal(t, exp, act)
+ assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():])
})
}
}
func TestRuneTypeScanner(t *testing.T) {
testcases := map[string]runeTypeScannerTestcase{
- "basic": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+ "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -70,7 +73,7 @@ func TestRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
- "unread": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+ "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -89,7 +92,7 @@ func TestRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
- "unread2": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+ "unread2": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -109,7 +112,7 @@ func TestRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
- "unread-eof": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+ "unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -128,12 +131,26 @@ func TestRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
- "syntax-error": {`[[0,]`, []ReadRuneTypeResult{
+ "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{
{'[', 1, RuneTypeArrayBeg, nil},
{'[', 1, RuneTypeArrayBeg, nil},
{'0', 1, RuneTypeNumberIntZero, nil},
{',', 1, RuneTypeArrayComma, nil},
{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+ {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+ {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+ }},
+ "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{
+ {'1', 1, RuneTypeNumberIntDig, nil},
+ {'{', 1, RuneTypeEOF, nil},
+ {'{', 1, RuneTypeEOF, nil},
+ {'{', 1, RuneTypeEOF, nil},
+ }},
+ "early-eof": {`{`, ``, []ReadRuneTypeResult{
+ {'{', 1, RuneTypeObjectBeg, nil},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
}},
}
testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
@@ -145,7 +162,7 @@ func TestRuneTypeScanner(t *testing.T) {
func TestNoWSRuneTypeScanner(t *testing.T) {
testcases := map[string]runeTypeScannerTestcase{
- "basic": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+ "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -161,7 +178,7 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
- "unread": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+ "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -179,7 +196,7 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
- "tail": {`{"foo": 12.0} `, []ReadRuneTypeResult{
+ "tail": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{
{'{', 1, RuneTypeObjectBeg, nil},
{'"', 1, RuneTypeStringBeg, nil},
{'f', 1, RuneTypeStringChar, nil},
@@ -195,6 +212,18 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
{0, 0, RuneTypeEOF, nil},
{0, 0, RuneTypeEOF, nil},
}},
+ "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{
+ {'1', 1, RuneTypeNumberIntDig, nil},
+ {'{', 1, RuneTypeEOF, nil},
+ {'{', 1, RuneTypeEOF, nil},
+ {'{', 1, RuneTypeEOF, nil},
+ }},
+ "early-eof": {` {`, ``, []ReadRuneTypeResult{
+ {'{', 1, RuneTypeObjectBeg, nil},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}},
+ }},
}
testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
return &noWSRuneTypeScanner{
@@ -206,6 +235,83 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
}
func TestElemRuneTypeScanner(t *testing.T) {
+ testcases := map[string]runeTypeScannerTestcase{
+ "basic": {`1`, ``, []ReadRuneTypeResult{
+ {'1', 1, RuneTypeNumberIntDig, nil},
+ {0, 0, RuneTypeEOF, nil},
+ {0, 0, RuneTypeEOF, nil},
+ {0, 0, RuneTypeEOF, nil},
+ }},
+ "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{
+ {'[', 1, RuneTypeArrayBeg, nil},
+ {'[', 1, RuneTypeArrayBeg, nil},
+ {'0', 1, RuneTypeNumberIntZero, nil},
+ {',', 1, RuneTypeArrayComma, nil},
+ {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+ {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+ {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+ }},
+ "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{
+ {'1', 1, RuneTypeNumberIntDig, nil},
+ {0, 0, RuneTypeEOF, nil},
+ {0, 0, RuneTypeEOF, nil},
+ {0, 0, RuneTypeEOF, nil},
+ }},
+ "fragment": {`1,`, `,`, []ReadRuneTypeResult{
+ {'1', 1, RuneTypeNumberIntDig, nil},
+ {0, 0, RuneTypeEOF, nil},
+ {0, 0, RuneTypeEOF, nil},
+ {0, 0, RuneTypeEOF, nil},
+ }},
+ "early-eof": {`{`, ``, []ReadRuneTypeResult{
+ {'{', 1, RuneTypeObjectBeg, nil},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+ {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+ }},
+ }
+ t.Run("top-level", func(t *testing.T) {
+ testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
+ return &elemRuneTypeScanner{
+ inner: &noWSRuneTypeScanner{
+ inner: &runeTypeScannerImpl{
+ inner: reader,
+ },
+ },
+ }
+ })
+ })
+
+ for tcName, tc := range testcases {
+ tc.Input = `[` + tc.Input
+ for _, res := range tc.Exp {
+ if se, ok := res.e.(*DecodeSyntaxError); ok {
+ se.Offset++
+ }
+ }
+ testcases[tcName] = tc
+ }
+ t.Run("child", func(t *testing.T) {
+ testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
+ inner := &noWSRuneTypeScanner{
+ inner: &runeTypeScannerImpl{
+ inner: reader,
+ },
+ }
+ var res ReadRuneTypeResult
+ res.r, res.s, res.t, res.e = inner.ReadRuneType()
+ require.Equal(t,
+ ReadRuneTypeResult{'[', 1, RuneTypeArrayBeg, nil}.String(),
+ res.String())
+
+ return &elemRuneTypeScanner{
+ inner: inner,
+ }
+ })
+ })
+}
+
+func TestElemRuneTypeScanner2(t *testing.T) {
parent := &noWSRuneTypeScanner{
inner: &runeTypeScannerImpl{
inner: strings.NewReader(` { "foo" : 12.0 } `),
diff --git a/decode_test.go b/decode_test.go
new file mode 100644
index 0000000..8220e39
--- /dev/null
+++ b/decode_test.go
@@ -0,0 +1,21 @@
+// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestDecodeNumber(t *testing.T) {
+ r := strings.NewReader(`1{}`)
+
+ var num int
+ assert.NoError(t, Decode(r, &num))
+ assert.Equal(t, 1, num)
+ assert.Equal(t, 2, r.Len()) // check that it didn't read too far
+}
diff --git a/errors.go b/errors.go
index e71d79a..3978d62 100644
--- a/errors.go
+++ b/errors.go
@@ -48,6 +48,7 @@ type DecodeSyntaxError struct {
func (e *DecodeSyntaxError) Error() string {
return fmt.Sprintf("json: syntax error at input byte %v: %v", e.Offset, e.Err)
}
+func (e *DecodeSyntaxError) Unwrap() error { return e.Err }
// A *DecodeTypeError is returned from Decode if the JSON input is not
// appropriate for the given Go type.