summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-03 22:54:00 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-03 22:54:00 -0700
commit4d460fc53ea054a2d5df89c6243f567ffb58b871 (patch)
tree1034ed77256fd9c7c0a4740eca429ece9781c017
parent69e4520942a27d7e8f8cdaf8f4611bf24fb73e7b (diff)
parentb3f4186f2b8e992f56f898784b1cd28bfd7550ca (diff)
Merge commit 'b3f4186f2b8e992f56f898784b1cd28bfd7550ca' into lukeshu/fix
-rw-r--r--decode.go56
-rw-r--r--decode_scan.go143
-rw-r--r--decode_scan_test.go263
-rw-r--r--encode.go26
-rw-r--r--internal/parse.go120
-rw-r--r--reencode.go23
6 files changed, 324 insertions, 307 deletions
diff --git a/decode.go b/decode.go
index 8fab267..60b530f 100644
--- a/decode.go
+++ b/decode.go
@@ -104,7 +104,7 @@ const maxNestingDepth = 10000
// an io.Reader.
func NewDecoder(r io.RuneScanner) *Decoder {
return &Decoder{
- io: &runeTypeScannerImpl{
+ io: runeTypeScanner{
inner: r,
parser: internal.Parser{
MaxDepth: maxNestingDepth,
@@ -245,6 +245,7 @@ func (dec *Decoder) Decode(ptr any) (err error) {
}
dec.io.Reset()
+ dec.io.PushReadBarrier()
defer func() {
if r := recover(); r != nil {
if de, ok := r.(decodeError); ok {
@@ -257,6 +258,7 @@ func (dec *Decoder) Decode(ptr any) (err error) {
}
}()
dec.decode(ptrVal.Elem(), false)
+ dec.io.PopReadBarrier()
return nil
}
@@ -319,12 +321,21 @@ func (dec *Decoder) expectRuneType(ec rune, et internal.RuneType, gt reflect.Typ
}
}
-type decRuneTypeScanner struct {
+type decRuneScanner struct {
dec *Decoder
+ eof bool
}
-func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) {
+func (sc *decRuneScanner) ReadRune() (rune, int, error) {
+ if sc.eof {
+ return 0, 0, io.EOF
+ }
c, s, t, e := sc.dec.io.ReadRuneType()
+ if t == internal.RuneTypeEOF {
+ sc.eof = true
+ sc.dec.io.PopReadBarrier()
+ return 0, 0, io.EOF
+ }
if e != nil {
panic(decodeError{
Field: sc.dec.structStackStr(),
@@ -333,28 +344,17 @@ func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, erro
Err: e,
})
}
- return c, s, t, nil
+ return c, s, nil
}
-func (sc *decRuneTypeScanner) ReadRune() (rune, int, error) {
- r, s, t, _ := sc.ReadRuneType()
- switch t {
- case internal.RuneTypeEOF:
- return 0, 0, io.EOF
- default:
- return r, s, nil
- }
+func (sc *decRuneScanner) UnreadRune() error {
+ return sc.dec.io.UnreadRune()
}
-func (sc *decRuneTypeScanner) UnreadRune() error { return sc.dec.io.UnreadRune() }
-func (sc *decRuneTypeScanner) InputOffset() int64 { return sc.dec.InputOffset() }
-func (sc *decRuneTypeScanner) Reset() { sc.dec.io.Reset() }
-
-func (dec *Decoder) limitingScanner() runeTypeScanner {
- return &elemRuneTypeScanner{
- inner: &decRuneTypeScanner{
- dec: dec,
- },
+func (dec *Decoder) limitingScanner() io.RuneScanner {
+ dec.io.PushReadBarrier()
+ return &decRuneScanner{
+ dec: dec,
}
}
@@ -867,7 +867,12 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er
}
}
}()
- dec := NewDecoder(r)
+ var dec *Decoder
+ if dr, ok := r.(*decRuneScanner); ok {
+ dec = dr.dec
+ } else {
+ dec = NewDecoder(r)
+ }
dec.posStackPush()
defer dec.posStackPop()
dec.decodeObject(nil,
@@ -947,7 +952,12 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e
}
}
}()
- dec := NewDecoder(r)
+ var dec *Decoder
+ if dr, ok := r.(*decRuneScanner); ok {
+ dec = dr.dec
+ } else {
+ dec = NewDecoder(r)
+ }
dec.posStackPush()
defer dec.posStackPop()
dec.decodeArray(nil, func() {
diff --git a/decode_scan.go b/decode_scan.go
index 261aaa6..e233caf 100644
--- a/decode_scan.go
+++ b/decode_scan.go
@@ -5,31 +5,12 @@
package lowmemjson
import (
- "errors"
"io"
"git.lukeshu.com/go/lowmemjson/internal"
)
-type runeTypeScanner interface {
- // The returned error is a *ReadError, a *SyntaxError, or nil.
- // An EOF condition is represented as one of:
- //
- // end of value but not file: (_, >0, RuneTypeEOF, nil)
- // end of both value and file: (_, 0, RuneTypeEOF, nil)
- // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
- // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF})
- ReadRuneType() (rune, int, internal.RuneType, error)
- // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil.
- ReadRune() (rune, int, error)
- UnreadRune() error
- Reset()
- InputOffset() int64
-}
-
-// runeTypeScannerImpl /////////////////////////////////////////////////////////////////////////////
-
-type runeTypeScannerImpl struct {
+type runeTypeScanner struct {
// everything that is not "initialized by constructor" starts
// out as the zero value.
@@ -47,9 +28,7 @@ type runeTypeScannerImpl struct {
rErr error
}
-var _ runeTypeScanner = (*runeTypeScannerImpl)(nil)
-
-func (sc *runeTypeScannerImpl) Reset() {
+func (sc *runeTypeScanner) Reset() {
sc.parser.Reset()
if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) {
sc.repeat = false
@@ -69,7 +48,14 @@ func (sc *runeTypeScannerImpl) Reset() {
}
}
-func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) {
+// The returned error is a *ReadError, a *SyntaxError, or nil.
+// An EOF condition is represented as one of:
+//
+// end of value but not file: (_, >0, RuneTypeEOF, nil)
+// end of both value and file: (_, 0, RuneTypeEOF, nil)
+// end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
+// end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF})
+func (sc *runeTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) {
switch {
case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF):
// do nothing
@@ -117,24 +103,12 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err
return sc.rRune, sc.rSize, sc.rType, sc.rErr
}
-func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) {
- r, s, t, e := sc.ReadRuneType()
- switch t {
- case internal.RuneTypeEOF:
- return 0, 0, io.EOF
- case internal.RuneTypeError:
- return 0, 0, e
- default:
- return r, s, nil
- }
-}
-
// UnreadRune undoes a call to .ReadRune() or .ReadRuneType().
//
// If the last call to .ReadRune() or .ReadRuneType() has already been
// unread, or if that call returned a rune with size 0, then
// ErrInvalidUnreadRune is returned. Otherwise, nil is returned.
-func (sc *runeTypeScannerImpl) UnreadRune() error {
+func (sc *runeTypeScanner) UnreadRune() error {
if sc.repeat || sc.rSize == 0 {
return ErrInvalidUnreadRune
}
@@ -143,7 +117,7 @@ func (sc *runeTypeScannerImpl) UnreadRune() error {
return nil
}
-func (sc *runeTypeScannerImpl) InputOffset() int64 {
+func (sc *runeTypeScanner) InputOffset() int64 {
ret := sc.offset
if sc.repeat {
ret -= int64(sc.rSize)
@@ -151,76 +125,37 @@ func (sc *runeTypeScannerImpl) InputOffset() int64 {
return ret
}
-// elemRuneTypeScanner /////////////////////////////////////////////////////////////////////////////
-
-type elemRuneTypeScanner struct {
- inner runeTypeScanner
-
- parser internal.Parser
- repeat bool
- stuck bool
- rType internal.RuneType
- rErr error
+func (sc *runeTypeScanner) PushReadBarrier() {
+ sc.parser.PushReadBarrier()
}
-var _ runeTypeScanner = (*elemRuneTypeScanner)(nil)
-
-func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) {
- // Read it, run it through the parent's parser.
- r, s, t, e := sc.inner.ReadRuneType()
-
- // Run it through our child parser.
- if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) {
- if sc.repeat || sc.stuck {
- sc.repeat = false
- } else {
- var err error
- if s > 0 {
- sc.rType, err = sc.parser.HandleRune(r)
- } else {
- sc.rType, err = sc.parser.HandleEOF()
- }
- if err != nil {
- sc.rErr = &DecodeSyntaxError{
- Offset: sc.inner.InputOffset(),
- Err: err,
- }
- } else {
- sc.rErr = nil
+func (sc *runeTypeScanner) PopReadBarrier() {
+ sc.parser.PopBarrier()
+ if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) {
+ // re-figure the rType and rErr
+ var err error
+ sc.rType, err = sc.parser.HandleRune(sc.rRune)
+ if err != nil {
+ sc.rErr = &DecodeSyntaxError{
+ Offset: sc.offset - int64(sc.rSize),
+ Err: err,
}
+ } else {
+ sc.rErr = nil
}
- sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError
- t, e = sc.rType, sc.rErr
- }
-
- // Check if we need to truncate the result.
- if t == internal.RuneTypeEOF {
- if s > 0 {
- _ = sc.inner.UnreadRune()
+ // tell it to use that rType and rErr
+ _ = sc.UnreadRune() // we set it up to always succeed
+ } else if sc.rType == internal.RuneTypeEOF {
+ // re-figure the rType and rErr
+ var err error
+ sc.rType, err = sc.parser.HandleEOF()
+ if err != nil {
+ sc.rErr = &DecodeSyntaxError{
+ Offset: sc.offset,
+ Err: err,
+ }
+ } else {
+ sc.rErr = nil
}
- return 0, 0, internal.RuneTypeEOF, nil
}
-
- return r, s, t, e
}
-
-func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) {
- r, s, t, e := sc.ReadRuneType()
- switch t {
- case internal.RuneTypeEOF:
- return 0, 0, io.EOF
- case internal.RuneTypeError:
- return 0, 0, e
- default:
- return r, s, nil
- }
-}
-
-func (sc *elemRuneTypeScanner) UnreadRune() error {
- ret := sc.inner.UnreadRune()
- sc.repeat = true
- return ret
-}
-
-func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() }
-func (sc *elemRuneTypeScanner) Reset() {}
diff --git a/decode_scan_test.go b/decode_scan_test.go
index 5bf5e2a..d0725e5 100644
--- a/decode_scan_test.go
+++ b/decode_scan_test.go
@@ -11,7 +11,6 @@ import (
"testing"
"github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/require"
"git.lukeshu.com/go/lowmemjson/internal"
)
@@ -23,8 +22,26 @@ type ReadRuneTypeResult struct {
e error
}
+const (
+ unreadRune = -1
+ pushReadBarrier = -2
+ popReadBarrier = -3
+ reset = -4
+)
+
func (r ReadRuneTypeResult) String() string {
- return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e)
+ switch r.s {
+ case unreadRune:
+ return fmt.Sprintf("{%q, unreadRune, %#v, %v}", r.r, r.t, r.e)
+ case pushReadBarrier:
+ return fmt.Sprintf("{%q, pushReadBarrier, %#v, %v}", r.r, r.t, r.e)
+ case popReadBarrier:
+ return fmt.Sprintf("{%q, popReadBarrier, %#v, %v}", r.r, r.t, r.e)
+ case reset:
+ return fmt.Sprintf("{%q, reset, %#v, %v}", r.r, r.t, r.e)
+ default:
+ return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e)
+ }
}
type runeTypeScannerTestcase struct {
@@ -33,31 +50,6 @@ type runeTypeScannerTestcase struct {
Exp []ReadRuneTypeResult
}
-func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneScanner) runeTypeScanner) {
- for tcName, tc := range testcases {
- tc := tc
- t.Run(tcName, func(t *testing.T) {
- t.Parallel()
- reader := strings.NewReader(tc.Input)
- sc := factory(reader)
- var exp, act []string
- for _, iExp := range tc.Exp {
- var iAct ReadRuneTypeResult
- if iExp.s < 0 {
- iAct.s = iExp.s
- iAct.e = sc.UnreadRune()
- } else {
- iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType()
- }
- exp = append(exp, iExp.String())
- act = append(act, iAct.String())
- }
- assert.Equal(t, exp, act)
- assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():])
- })
- }
-}
-
func TestRuneTypeScanner(t *testing.T) {
t.Parallel()
testcases := map[string]runeTypeScannerTestcase{
@@ -86,7 +78,7 @@ func TestRuneTypeScanner(t *testing.T) {
{'"', 1, internal.RuneTypeStringEnd, nil},
{':', 1, internal.RuneTypeObjectColon, nil},
{'1', 1, internal.RuneTypeNumberIntDig, nil},
- {0, -1, 0, nil},
+ {0, unreadRune, 0, nil},
{'1', 1, internal.RuneTypeNumberIntDig, nil},
{'2', 1, internal.RuneTypeNumberIntDig, nil},
{'.', 1, internal.RuneTypeNumberFracDot, nil},
@@ -104,8 +96,8 @@ func TestRuneTypeScanner(t *testing.T) {
{'"', 1, internal.RuneTypeStringEnd, nil},
{':', 1, internal.RuneTypeObjectColon, nil},
{'1', 1, internal.RuneTypeNumberIntDig, nil},
- {0, -1, 0, nil},
- {0, -1, 0, ErrInvalidUnreadRune},
+ {0, unreadRune, 0, nil},
+ {0, unreadRune, 0, ErrInvalidUnreadRune},
{'1', 1, internal.RuneTypeNumberIntDig, nil},
{'2', 1, internal.RuneTypeNumberIntDig, nil},
{'.', 1, internal.RuneTypeNumberFracDot, nil},
@@ -128,7 +120,7 @@ func TestRuneTypeScanner(t *testing.T) {
{'0', 1, internal.RuneTypeNumberFracDig, nil},
{'}', 1, internal.RuneTypeObjectEnd, nil},
{0, 0, internal.RuneTypeEOF, nil},
- {0, -1, 0, ErrInvalidUnreadRune},
+ {0, unreadRune, 0, ErrInvalidUnreadRune},
{0, 0, internal.RuneTypeEOF, nil},
{0, 0, internal.RuneTypeEOF, nil},
}},
@@ -174,150 +166,103 @@ func TestRuneTypeScanner(t *testing.T) {
{0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}},
{0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}},
}},
- }
- testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner {
- return &runeTypeScannerImpl{
- inner: reader,
- }
- })
-}
-
-func TestElemRuneTypeScanner(t *testing.T) {
- t.Parallel()
- toplevelTestcases := map[string]runeTypeScannerTestcase{
- "basic": {`1`, ``, []ReadRuneTypeResult{
+ "basic2": {`1`, ``, []ReadRuneTypeResult{
{'1', 1, internal.RuneTypeNumberIntDig, nil},
{0, 0, internal.RuneTypeEOF, nil},
{0, 0, internal.RuneTypeEOF, nil},
{0, 0, internal.RuneTypeEOF, nil},
}},
- "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{
- {'[', 1, internal.RuneTypeArrayBeg, nil},
- {'[', 1, internal.RuneTypeArrayBeg, nil},
- {'0', 1, internal.RuneTypeNumberIntZero, nil},
- {',', 1, internal.RuneTypeArrayComma, nil},
- {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
- {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
- {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
- }},
- "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{
+ "fragment": {`1,`, ``, []ReadRuneTypeResult{
{'1', 1, internal.RuneTypeNumberIntDig, nil},
- {0, 0, internal.RuneTypeEOF, nil},
- {0, 0, internal.RuneTypeEOF, nil},
- {0, 0, internal.RuneTypeEOF, nil},
+ {',', 1, internal.RuneTypeEOF, nil},
+ {',', 1, internal.RuneTypeEOF, nil},
+ {',', 1, internal.RuneTypeEOF, nil},
}},
- "fragment": {`1,`, `,`, []ReadRuneTypeResult{
+ "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{
+ {'{', 1, internal.RuneTypeObjectBeg, nil},
+ {'"', 1, internal.RuneTypeStringBeg, nil},
+ {'f', 1, internal.RuneTypeStringChar, nil},
+ {'o', 1, internal.RuneTypeStringChar, nil},
+ {'o', 1, internal.RuneTypeStringChar, nil},
+ {'"', 1, internal.RuneTypeStringEnd, nil},
+ {':', 1, internal.RuneTypeObjectColon, nil},
+ {0, pushReadBarrier, 0, nil},
{'1', 1, internal.RuneTypeNumberIntDig, nil},
- {0, 0, internal.RuneTypeEOF, nil},
+ {'2', 1, internal.RuneTypeNumberIntDig, nil},
+ {'.', 1, internal.RuneTypeNumberFracDot, nil},
+ {'0', 1, internal.RuneTypeNumberFracDig, nil},
+ {'}', 1, internal.RuneTypeEOF, nil},
+ {'}', 1, internal.RuneTypeEOF, nil},
+ {0, popReadBarrier, 0, nil},
+ {'}', 1, internal.RuneTypeObjectEnd, nil},
{0, 0, internal.RuneTypeEOF, nil},
{0, 0, internal.RuneTypeEOF, nil},
}},
- "early-eof": {`{`, ``, []ReadRuneTypeResult{
- {'{', 1, internal.RuneTypeObjectBeg, nil},
- {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
- {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
- {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
- }},
}
-
- childTestcases := make(map[string]runeTypeScannerTestcase, len(toplevelTestcases))
- for tcName, tc := range toplevelTestcases {
- tc.Input = `[` + tc.Input
- tc.Exp = append([]ReadRuneTypeResult(nil), tc.Exp...) // copy
- for i, res := range tc.Exp {
- if se, ok := res.e.(*DecodeSyntaxError); ok {
- seCopy := *se
- seCopy.Offset++
- tc.Exp[i].e = &seCopy
+ func() {
+ childTestcases := make(map[string]runeTypeScannerTestcase)
+ for tcName, tc := range testcases {
+ canChild := true
+ for _, res := range tc.Exp {
+ if res.s == pushReadBarrier {
+ canChild = false
+ break
+ }
}
- }
- childTestcases[tcName] = tc
- }
-
- t.Run("top-level", func(t *testing.T) {
- t.Parallel()
- testRuneTypeScanner(t, toplevelTestcases, func(reader io.RuneScanner) runeTypeScanner {
- return &elemRuneTypeScanner{
- inner: &runeTypeScannerImpl{
- inner: reader,
- },
+ if !canChild {
+ continue
}
- })
- })
- t.Run("child", func(t *testing.T) {
- t.Parallel()
- testRuneTypeScanner(t, childTestcases, func(reader io.RuneScanner) runeTypeScanner {
- inner := &runeTypeScannerImpl{
- inner: reader,
+ tc.Input = `[1,` + tc.Input
+ tc.Exp = append([]ReadRuneTypeResult{
+ {'[', 1, internal.RuneTypeArrayBeg, nil},
+ {'1', 1, internal.RuneTypeNumberIntDig, nil},
+ {',', 1, internal.RuneTypeArrayComma, nil},
+ {0, pushReadBarrier, 0, nil},
+ }, tc.Exp...)
+ for i := 2; i < len(tc.Exp); i++ {
+ if se, ok := tc.Exp[i].e.(*DecodeSyntaxError); ok {
+ seCopy := *se
+ seCopy.Offset += 3
+ tc.Exp[i].e = &seCopy
+ }
}
- var res ReadRuneTypeResult
- res.r, res.s, res.t, res.e = inner.ReadRuneType()
- require.Equal(t,
- ReadRuneTypeResult{'[', 1, internal.RuneTypeArrayBeg, nil}.String(),
- res.String())
-
- return &elemRuneTypeScanner{
- inner: inner,
+ childTestcases["child-"+tcName] = tc
+ }
+ for tcName, tc := range childTestcases {
+ testcases[tcName] = tc
+ }
+ }()
+ for tcName, tc := range testcases {
+ tc := tc
+ t.Run(tcName, func(t *testing.T) {
+ t.Parallel()
+ t.Logf("input=%q", tc.Input)
+ reader := strings.NewReader(tc.Input)
+ sc := &runeTypeScanner{inner: reader}
+ var exp, act []string
+ for _, iExp := range tc.Exp {
+ var iAct ReadRuneTypeResult
+ switch iExp.s {
+ case unreadRune:
+ iAct.s = iExp.s
+ iAct.e = sc.UnreadRune()
+ case pushReadBarrier:
+ sc.PushReadBarrier()
+ iAct.s = iExp.s
+ case popReadBarrier:
+ sc.PopReadBarrier()
+ iAct.s = iExp.s
+ case reset:
+ sc.Reset()
+ iAct.s = iExp.s
+ default:
+ iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType()
+ }
+ exp = append(exp, iExp.String())
+ act = append(act, iAct.String())
}
+ assert.Equal(t, exp, act)
+ assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():])
})
- })
-}
-
-func TestElemRuneTypeScanner2(t *testing.T) {
- t.Parallel()
- parent := &runeTypeScannerImpl{
- inner: strings.NewReader(` { "foo" : 12.0 } `),
- }
- exp := []ReadRuneTypeResult{
- {'{', 1, internal.RuneTypeObjectBeg, nil},
- {'"', 1, internal.RuneTypeStringBeg, nil},
- {'f', 1, internal.RuneTypeStringChar, nil},
- {'o', 1, internal.RuneTypeStringChar, nil},
- {'o', 1, internal.RuneTypeStringChar, nil},
- {'"', 1, internal.RuneTypeStringEnd, nil},
- {':', 1, internal.RuneTypeObjectColon, nil},
- }
- expStr := make([]string, 0, len(exp))
- actStr := make([]string, 0, len(exp))
- for _, iExp := range exp {
- var iAct ReadRuneTypeResult
- iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType()
- expStr = append(expStr, iExp.String())
- actStr = append(actStr, iAct.String())
- require.Equal(t, expStr, actStr)
- }
-
- child := &elemRuneTypeScanner{
- inner: parent,
- }
- exp = []ReadRuneTypeResult{
- {'1', 1, internal.RuneTypeNumberIntDig, nil},
- {'2', 1, internal.RuneTypeNumberIntDig, nil},
- {'.', 1, internal.RuneTypeNumberFracDot, nil},
- {'0', 1, internal.RuneTypeNumberFracDig, nil},
- {0, 0, internal.RuneTypeEOF, nil},
- {0, 0, internal.RuneTypeEOF, nil},
- }
- expStr, actStr = nil, nil
- for _, iExp := range exp {
- var iAct ReadRuneTypeResult
- iAct.r, iAct.s, iAct.t, iAct.e = child.ReadRuneType()
- expStr = append(expStr, iExp.String())
- actStr = append(actStr, iAct.String())
- require.Equal(t, expStr, actStr)
- }
-
- exp = []ReadRuneTypeResult{
- {'}', 1, internal.RuneTypeObjectEnd, nil},
- {0, 0, internal.RuneTypeEOF, nil},
- {0, 0, internal.RuneTypeEOF, nil},
- }
- expStr, actStr = nil, nil
- for _, iExp := range exp {
- var iAct ReadRuneTypeResult
- iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType()
- expStr = append(expStr, iExp.String())
- actStr = append(actStr, iAct.String())
- require.Equal(t, expStr, actStr)
}
}
diff --git a/encode.go b/encode.go
index 5fb4fbf..ca4e060 100644
--- a/encode.go
+++ b/encode.go
@@ -9,17 +9,13 @@ import (
"encoding"
"encoding/base64"
"encoding/json"
- "errors"
"fmt"
"io"
- iofs "io/fs"
"reflect"
"sort"
"strconv"
"strings"
"unsafe"
-
- "git.lukeshu.com/go/lowmemjson/internal"
)
// Encodable is the interface implemented by types that can encode
@@ -98,7 +94,7 @@ var (
const startDetectingCyclesAfter = 1000
-func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error {
+func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error {
if !val.IsValid() {
return discardInt(w.WriteString("null"))
}
@@ -115,22 +111,22 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q
if !ok {
return discardInt(w.WriteString("null"))
}
- // Use a sub-ReEncoder to check that it's a full element.
- validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}}
- if err := obj.EncodeJSON(validator); err != nil {
+ w.pushWriteBarrier()
+ if err := obj.EncodeJSON(w); err != nil {
return &EncodeMethodError{
Type: val.Type(),
SourceFunc: "EncodeJSON",
Err: err,
}
}
- if err := validator.Close(); err != nil && !errors.Is(err, iofs.ErrClosed) {
+ if err := w.Close(); err != nil {
return &EncodeMethodError{
Type: val.Type(),
SourceFunc: "EncodeJSON",
Err: err,
}
}
+ w.popWriteBarrier()
case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(jsonMarshalerType):
val = val.Addr()
@@ -151,22 +147,22 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q
Err: err,
}
}
- // Use a sub-ReEncoder to check that it's a full element.
- validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}}
- if _, err := validator.Write(dat); err != nil {
+ w.pushWriteBarrier()
+ if _, err := w.Write(dat); err != nil {
return &EncodeMethodError{
Type: val.Type(),
SourceFunc: "MarshalJSON",
Err: err,
}
}
- if err := validator.Close(); err != nil {
+ if err := w.Close(); err != nil {
return &EncodeMethodError{
Type: val.Type(),
SourceFunc: "MarshalJSON",
Err: err,
}
}
+ w.popWriteBarrier()
case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(textMarshalerType):
val = val.Addr()
@@ -361,7 +357,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q
for i := 0; iter.Next(); i++ {
// TODO: Avoid buffering the map key
var k strings.Builder
- if err := encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil {
+ if err := encode(NewReEncoder(&k, ReEncoderConfig{BackslashEscape: escaper}), iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil {
return err
}
kStr := k.String()
@@ -496,7 +492,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q
return nil
}
-func encodeArray(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error {
+func encodeArray(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error {
if err := w.WriteByte('['); err != nil {
return err
}
diff --git a/internal/parse.go b/internal/parse.go
index 9db57fb..36db4a9 100644
--- a/internal/parse.go
+++ b/internal/parse.go
@@ -313,6 +313,13 @@ type Parser struct {
// a ["x","y"
// ["x","y"]
stack []RuneType
+
+ barriers []barrier
+}
+
+type barrier struct {
+ closed bool
+ stack []RuneType
}
func (par *Parser) init() {
@@ -345,8 +352,22 @@ func (par *Parser) stackString() string {
return buf.String()
}
+func (par *Parser) depth() int {
+ n := len(par.stack)
+ for _, barrier := range par.barriers {
+ n += len(barrier.stack)
+ }
+ return n
+}
+
func (par *Parser) StackIsEmpty() bool {
- return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny)
+ if len(par.barriers) > 0 {
+ return false
+ }
+ if len(par.stack) == 0 {
+ return true
+ }
+ return len(par.stack) == 1 && par.stack[0] == runeTypeAny
}
func (par *Parser) StackSize() int {
@@ -360,6 +381,99 @@ func (par *Parser) Reset() {
}
}
+// PushReadBarrier causes the parser to expect EOF once the end of the
+// element that is started by the current top-of-stack is reached,
+// until this is un-done with PopBarrier. It essentially turns the
+// parser in to a sub-parser.
+//
+// PushReadBarrier may only be called at the beginning of an element,
+// whether that be
+//
+// - runeTypeAny
+// - RuneTypeObjectBeg
+// - RuneTypeArrayBeg
+// - RuneTypeStringBeg
+// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig
+// - RuneTypeTrueT
+// - RuneTypeFalseF
+// - RuneTypeNullN
+func (par *Parser) PushReadBarrier() {
+ // Sanity checking.
+ par.init()
+ if len(par.stack) == 0 {
+ panic(errors.New("illegal PushReadBarrier call: empty stack"))
+ }
+ curState := par.stack[len(par.stack)-1]
+ switch curState {
+ case runeTypeAny,
+ RuneTypeObjectBeg,
+ RuneTypeArrayBeg,
+ RuneTypeStringBeg,
+ RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig,
+ RuneTypeTrueT,
+ RuneTypeFalseF,
+ RuneTypeNullN:
+ // OK
+ default:
+ panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState))
+ }
+ // Actually push.
+ par.barriers = append(par.barriers, barrier{
+ closed: par.closed,
+ stack: par.stack[:len(par.stack)-1],
+ })
+ par.stack = []RuneType{curState}
+}
+
+// PushWriteBarrier causes the parser to expect EOF once the end of
+// the about-to-start element is reached, until this is un-done with
+// PopBarrier. It essentially turns the parser in to a sub-parser.
+//
+// PushWriteBarrier may only be called at the places where an element
+// of any type may start:
+//
+// - runeTypeAny for top-level and object-value elements
+// - RuneTypeArrayBeg for array-item elements
+//
+// PushWriteBarrier signals intent to write an element; if it is
+// called in a place where an element is optional (at the beginning of
+// an array), it becomes a syntax error to not write the element.
+func (par *Parser) PushWriteBarrier() {
+ par.init()
+ if len(par.stack) == 0 {
+ panic(errors.New("illegal PushWriteBarrier call: empty stack"))
+ }
+ switch par.stack[len(par.stack)-1] {
+ case runeTypeAny:
+ par.popState()
+ par.barriers = append(par.barriers, barrier{
+ closed: par.closed,
+ stack: par.stack,
+ })
+ par.stack = []RuneType{runeTypeAny}
+ case RuneTypeArrayBeg:
+ par.replaceState(RuneTypeArrayComma)
+ par.barriers = append(par.barriers, barrier{
+ closed: par.closed,
+ stack: par.stack,
+ })
+ par.stack = []RuneType{runeTypeAny}
+ default:
+ panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1]))
+ }
+}
+
+// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier.
+func (par *Parser) PopBarrier() {
+ if len(par.barriers) == 0 {
+ panic(errors.New("illegal PopBarrier call: empty barrier stack"))
+ }
+ barrier := par.barriers[len(par.barriers)-1]
+ par.barriers = par.barriers[:len(par.barriers)-1]
+ par.closed = barrier.closed
+ par.stack = append(barrier.stack, par.stack...)
+}
+
// HandleEOF feeds EOF to the Parser. The returned RuneType is either
// RuneTypeEOF or RuneTypeError.
//
@@ -439,12 +553,12 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
case 0x0020, 0x000A, 0x000D, 0x0009:
return RuneTypeSpace, nil
case '{':
- if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth {
+ if par.MaxDepth > 0 && par.depth() > par.MaxDepth {
return RuneTypeError, ErrParserExceededMaxDepth
}
return par.replaceState(RuneTypeObjectBeg), nil
case '[':
- if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth {
+ if par.MaxDepth > 0 && par.depth() > par.MaxDepth {
return RuneTypeError, ErrParserExceededMaxDepth
}
return par.replaceState(RuneTypeArrayBeg), nil
diff --git a/reencode.go b/reencode.go
index a5dc3c8..eae80db 100644
--- a/reencode.go
+++ b/reencode.go
@@ -106,6 +106,9 @@ type ReEncoder struct {
fracZeros int64
expZero bool
specu *speculation
+
+ // state: .pushBarrier and .popBarrier
+ stackInputPos []int64
}
type speculation struct {
@@ -227,7 +230,7 @@ func (enc *ReEncoder) Close() error {
}
return enc.err
}
- if enc.AllowMultipleValues {
+ if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 {
enc.par.Reset()
}
return nil
@@ -264,7 +267,7 @@ rehandle:
}
enc.err = enc.handleRune(c, t, enc.par.StackSize())
if enc.err == nil && t == internal.RuneTypeEOF {
- if enc.AllowMultipleValues {
+ if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 {
enc.par.Reset()
goto rehandle
} else {
@@ -280,6 +283,20 @@ rehandle:
return enc.written, enc.err
}
+// semi-public API /////////////////////////////////////////////////////////////
+
+func (enc *ReEncoder) pushWriteBarrier() {
+ enc.par.PushWriteBarrier()
+ enc.stackInputPos = append(enc.stackInputPos, enc.inputPos)
+ enc.inputPos = 0
+}
+
+func (enc *ReEncoder) popWriteBarrier() {
+ enc.par.PopBarrier()
+ enc.inputPos += enc.stackInputPos[len(enc.stackInputPos)-1]
+ enc.stackInputPos = enc.stackInputPos[:len(enc.stackInputPos)-1]
+}
+
// internal ////////////////////////////////////////////////////////////////////
func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error {
@@ -503,7 +520,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error {
case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value
enc.wasNumber = enc.lastNonSpace.IsNumber()
switch {
- case enc.ForceTrailingNewlines:
+ case enc.ForceTrailingNewlines && len(enc.stackInputPos) == 0:
t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
err = enc.emitByte('\n')
default: