summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:56:37 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:56:37 -0700
commita0113140d447e59ce02d131499861aeafb02d328 (patch)
tree3a61b0c070a5db186e2c49fe70dff6f40431124e
parent6f8e7db1ac5ddd21b8e3fcc39a1e30fde9b62c3a (diff)
parentd19e2c6884c2d409fcc828c870f1839ee84f38cb (diff)
Merge branch 'lukeshu/reencode-refactor'
-rw-r--r--encode.go2
-rw-r--r--reencode.go335
-rw-r--r--reencode_compactnum.go67
-rw-r--r--reencode_compactws.go27
-rw-r--r--reencode_compactwsifunder.go106
-rw-r--r--reencode_indent.go102
-rw-r--r--reencode_nl.go39
-rw-r--r--reencode_string.go33
-rw-r--r--reencode_test.go15
-rw-r--r--reencode_write.go52
10 files changed, 514 insertions, 264 deletions
diff --git a/encode.go b/encode.go
index 38a2e93..00d3dad 100644
--- a/encode.go
+++ b/encode.go
@@ -83,7 +83,7 @@ func (enc *Encoder) Encode(obj any) (err error) {
if enc.isRoot {
enc.w.par.Reset()
}
- escaper := enc.w.cfg.BackslashEscape
+ escaper := enc.w.esc
if escaper == nil {
escaper = EscapeDefault
}
diff --git a/reencode.go b/reencode.go
index d8cdb71..7e9b5ff 100644
--- a/reencode.go
+++ b/reencode.go
@@ -5,14 +5,12 @@
package lowmemjson
import (
- "bytes"
"fmt"
"io"
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/fastio"
"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
- "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
// A ReEncoderConfig controls how a ReEncoder should behave.
@@ -71,10 +69,57 @@ type ReEncoderConfig struct {
// calls are syscalls, then you may want to wrap Out in a
// bufio.Writer.
func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
+ var module reEncoderModule
+
+ // Basic
+ module = &reEncodeWrite{
+ out: fastio.NewAllWriter(out),
+ }
+
+ // Whitespace
+ if cfg.ForceTrailingNewlines {
+ module = &reEncodeForceNL{
+ out: module,
+ }
+ }
+ switch {
+ case cfg.Compact:
+ module = &reEncodeCompactWS{
+ out: module,
+ }
+ case cfg.Indent != "":
+ if cfg.CompactIfUnder > 0 {
+ module = &reEncodeCompactWSIfUnder{
+ out: module,
+ CompactWSIfUnder: cfg.CompactIfUnder,
+ }
+ }
+ module = &reEncodeIndent{
+ out: module,
+ Indent: cfg.Indent,
+ Prefix: cfg.Prefix,
+ }
+ }
+
+ // Numbers
+ module = &reEncodeCompactNum{
+ out: module,
+ }
+
+ // Strings
+ escaper := cfg.BackslashEscape
+ if escaper == nil {
+ escaper = EscapeDefault
+ }
+ module = &reEncodeString{
+ out: module,
+ BackslashEscape: escaper,
+ }
+
return &ReEncoder{
- cfg: cfg,
- out: fastio.NewAllWriter(out),
- specu: new(speculation),
+ out: module,
+ esc: escaper,
+ allowMultipleValues: cfg.AllowMultipleValues,
}
}
@@ -87,8 +132,9 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
//
// The memory use of a ReEncoder is O(CompactIfUnder+depth).
type ReEncoder struct {
- cfg ReEncoderConfig
- out fastio.AllWriter
+ out reEncoderModule
+ esc BackslashEscaper
+ allowMultipleValues bool
// state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer
buf [utf8.UTFMax]byte
@@ -99,18 +145,11 @@ type ReEncoder struct {
par jsonparse.Parser
inputPos int64
- // state: .handleRune
- lastNonSpace jsonparse.RuneType
- lastNonSpaceNonEOF jsonparse.RuneType
- wasNumber bool
- curIndent int
- uhex [3]byte // "\uABCD"-encoded characters in strings
- fracZeros int64
- expZero bool
- specu *speculation
-
// state: .pushWriteBarrier and .popWriteBarrier
barriers []barrier
+
+ // state: .handleRuneType
+ uhex [3]byte // "\uABCD"-encoded characters in strings
}
type barrier struct {
@@ -118,26 +157,9 @@ type barrier struct {
stackSize int
}
-type speculation struct {
- speculating bool
- endWhenStackSize int
- fmt ReEncoder
- compact bytes.Buffer
- buf []inputTuple
-}
-
-func (specu *speculation) Reset() {
- specu.speculating = false
- specu.endWhenStackSize = 0
- specu.fmt = ReEncoder{}
- specu.compact.Reset()
- specu.buf = specu.buf[:0]
-}
-
-type inputTuple struct {
- c rune
- t jsonparse.RuneType
- stackSize int
+type reEncoderModule interface {
+ HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error
+ PopWriteBarrier()
}
// public API //////////////////////////////////////////////////////////////////
@@ -239,14 +261,14 @@ func (enc *ReEncoder) Close() error {
return enc.err
}
if len(enc.barriers) == 0 {
- if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(0, jsonparse.RuneTypeEOF, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
return enc.err
}
- if enc.cfg.AllowMultipleValues {
+ if enc.allowMultipleValues {
enc.par.Reset()
}
}
@@ -271,7 +293,7 @@ rehandle:
return
}
if t == jsonparse.RuneTypeEOF {
- if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 {
+ if enc.allowMultipleValues && len(enc.barriers) == 0 {
enc.par.Reset()
goto rehandle
} else {
@@ -301,7 +323,7 @@ func (enc *ReEncoder) popWriteBarrier() {
enc.par.PopBarrier()
enc.inputPos += enc.barriers[len(enc.barriers)-1].inputPos
enc.barriers = enc.barriers[:len(enc.barriers)-1]
- enc.lastNonSpace = enc.lastNonSpaceNonEOF
+ enc.out.PopWriteBarrier()
}
// internal ////////////////////////////////////////////////////////////////////
@@ -315,189 +337,9 @@ func (enc *ReEncoder) stackSize() int {
}
func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error {
- if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" {
- return enc.handleRuneNoSpeculation(c, t)
- }
-
- // main
- if !enc.specu.speculating { // not speculating
- switch t {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating
- if err, _ := enc.handleRunePre(c, t); err != nil {
- return err
- }
- enc.specu.speculating = true
- enc.specu.endWhenStackSize = stackSize - 1
- enc.specu.fmt = ReEncoder{
- cfg: enc.cfg,
- out: &enc.specu.compact,
- }
- enc.specu.fmt.cfg.Compact = true
- enc.specu.buf = append(enc.specu.buf, inputTuple{
- c: c,
- t: t,
- stackSize: stackSize,
- })
- if err := enc.specu.fmt.handleRuneMain(c, t); err != nil {
- return err
- }
- default:
- if err := enc.handleRuneNoSpeculation(c, t); err != nil {
- return err
- }
- }
- } else { // speculating
- enc.specu.buf = append(enc.specu.buf, inputTuple{
- c: c,
- t: t,
- stackSize: stackSize,
- })
- if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil {
- return err
- }
- switch {
- case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent
- buf := append([]inputTuple(nil), enc.specu.buf...)
- enc.specu.Reset()
- if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil {
- return err
- }
- for _, tuple := range buf[1:] {
- if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil {
- return err
- }
- }
- case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact
- if _, err := enc.specu.compact.WriteTo(enc.out); err != nil {
- return err
- }
- enc.specu.Reset()
- enc.lastNonSpace = t
- enc.curIndent--
- }
- }
-
- return nil
-}
-
-func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t jsonparse.RuneType) error {
- err, shouldHandle := enc.handleRunePre(c, t)
- if err != nil {
- return err
- }
- if !shouldHandle {
- return nil
- }
- return enc.handleRuneMain(c, t)
-}
-
-// handleRunePre handles buffered things that need to happen before
-// the new rune itself is handled.
-func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) {
- // emit newlines between top-level values
- if enc.lastNonSpace == jsonparse.RuneTypeEOF {
- switch {
- case enc.wasNumber && t.IsNumber():
- if err := enc.out.WriteByte('\n'); err != nil {
- return err, false
- }
- case enc.cfg.Indent != "" && !enc.cfg.Compact:
- if err := enc.out.WriteByte('\n'); err != nil {
- return err, false
- }
- }
- }
-
- // shorten numbers
- switch t { // trim trailing '0's from the fraction-part, but don't remove all digits
- case jsonparse.RuneTypeNumberFracDot:
- enc.fracZeros = 0
- case jsonparse.RuneTypeNumberFracDig:
- if c == '0' && enc.lastNonSpace == jsonparse.RuneTypeNumberFracDig {
- enc.fracZeros++
- return nil, false
- }
- fallthrough
- default:
- for enc.fracZeros > 0 {
- if err := enc.out.WriteByte('0'); err != nil {
- return err, false
- }
- enc.fracZeros--
- }
- }
- switch t { // trim leading '0's from the exponent-part, but don't remove all digits
- case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign:
- enc.expZero = true
- case jsonparse.RuneTypeNumberExpDig:
- if c == '0' && enc.expZero {
- return nil, false
- }
- enc.expZero = false
- default:
- if enc.expZero {
- if err := enc.out.WriteByte('0'); err != nil {
- return err, false
- }
- enc.expZero = false
- }
- }
-
- // whitespace
- switch {
- case enc.cfg.Compact:
- if t == jsonparse.RuneTypeSpace {
- return nil, false
- }
- case enc.cfg.Indent != "":
- switch t {
- case jsonparse.RuneTypeSpace:
- // let us manage whitespace, don't pass it through
- return nil, false
- case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd:
- enc.curIndent--
- switch enc.lastNonSpace {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg:
- // collapse
- default:
- if err := enc.emitNlIndent(); err != nil {
- return err, false
- }
- }
- default:
- switch enc.lastNonSpace {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma:
- if err := enc.emitNlIndent(); err != nil {
- return err, false
- }
- case jsonparse.RuneTypeObjectColon:
- if err := enc.out.WriteByte(' '); err != nil {
- return err, false
- }
- }
- switch t {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg:
- enc.curIndent++
- }
- }
- }
-
- return nil, true
-}
-
-// handleRuneMain handles the new rune itself, not buffered things.
-func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
- escaper := enc.cfg.BackslashEscape
- if escaper == nil {
- escaper = EscapeDefault
- }
- var err error
switch t {
-
- case jsonparse.RuneTypeStringChar:
- err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))
case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU:
- // do nothing
+ return nil
case jsonparse.RuneTypeStringEsc1:
switch c {
case '"', '\\', '/':
@@ -515,54 +357,25 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
default:
panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c))
}
- err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))
+ return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeShort, stackSize)
case jsonparse.RuneTypeStringEscUA:
enc.uhex[0] = byte(c)
+ return nil
case jsonparse.RuneTypeStringEscUB:
enc.uhex[1] = byte(c)
+ return nil
case jsonparse.RuneTypeStringEscUC:
enc.uhex[2] = byte(c)
+ return nil
case jsonparse.RuneTypeStringEscUD:
c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c))
- err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))
-
- case jsonparse.RuneTypeError: // EOF explicitly stated by .Close()
- fallthrough
- case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value
- enc.wasNumber = enc.lastNonSpace.IsNumber()
- switch {
- case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0:
- t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
- err = enc.out.WriteByte('\n')
- default:
- t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed
- }
+ return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeUnicode, stackSize)
+ case jsonparse.RuneTypeError:
+ panic(fmt.Errorf("should not happen: handleRune called with %#v", t))
default:
- err = enc.out.WriteByte(byte(c))
- }
-
- if t != jsonparse.RuneTypeSpace {
- enc.lastNonSpace = t
- if t != jsonparse.RuneTypeEOF {
- enc.lastNonSpaceNonEOF = t
- }
- }
- return err
-}
-
-func (enc *ReEncoder) emitNlIndent() error {
- if err := enc.out.WriteByte('\n'); err != nil {
- return err
- }
- if enc.cfg.Prefix != "" {
- if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil {
- return err
+ if t > jsonparse.RuneTypeEOF {
+ panic(fmt.Errorf("should not happen: handleRune called with %#v", t))
}
+ return enc.out.HandleRune(c, t, BackslashEscapeNone, stackSize)
}
- for i := 0; i < enc.curIndent; i++ {
- if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil {
- return err
- }
- }
- return nil
}
diff --git a/reencode_compactnum.go b/reencode_compactnum.go
new file mode 100644
index 0000000..5da2c54
--- /dev/null
+++ b/reencode_compactnum.go
@@ -0,0 +1,67 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+type reEncodeCompactNum struct {
+ out reEncoderModule
+
+ // state
+ fracFirst bool
+ fracZeros int64
+ expZero bool
+}
+
+var _ reEncoderModule = (*reEncodeCompactNum)(nil)
+
+func (enc *reEncodeCompactNum) PopWriteBarrier() {
+ enc.out.PopWriteBarrier()
+}
+
+func (enc *reEncodeCompactNum) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error {
+ // trim trailing '0's from the fraction-part, but don't remove all digits
+ switch t {
+ case jsonparse.RuneTypeNumberFracDot:
+ enc.fracFirst = true
+ enc.fracZeros = 0
+ case jsonparse.RuneTypeNumberFracDig:
+ if c == '0' && !enc.fracFirst {
+ enc.fracZeros++
+ return nil
+ }
+ fallthrough
+ default:
+ for enc.fracZeros > 0 {
+ if err := enc.out.HandleRune('0', jsonparse.RuneTypeNumberFracDig, escape, stackSize); err != nil {
+ return err
+ }
+ enc.fracZeros--
+ }
+ enc.fracFirst = false
+ }
+
+ // trim leading '0's from the exponent-part, but don't remove all digits
+ switch t {
+ case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign:
+ enc.expZero = true
+ case jsonparse.RuneTypeNumberExpDig:
+ if c == '0' && enc.expZero {
+ return nil
+ }
+ enc.expZero = false
+ default:
+ if enc.expZero {
+ if err := enc.out.HandleRune('0', jsonparse.RuneTypeNumberFracDig, escape, stackSize); err != nil {
+ return err
+ }
+ enc.expZero = false
+ }
+ }
+
+ return enc.out.HandleRune(c, t, escape, stackSize)
+}
diff --git a/reencode_compactws.go b/reencode_compactws.go
new file mode 100644
index 0000000..396cf6d
--- /dev/null
+++ b/reencode_compactws.go
@@ -0,0 +1,27 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+type reEncodeCompactWS struct {
+ out reEncoderModule
+}
+
+var _ reEncoderModule = (*reEncodeCompactWS)(nil)
+
+func (enc *reEncodeCompactWS) PopWriteBarrier() {
+ enc.out.PopWriteBarrier()
+}
+
+func (enc *reEncodeCompactWS) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error {
+ if t == jsonparse.RuneTypeSpace {
+ return nil
+ }
+
+ return enc.out.HandleRune(c, t, escape, stackSize)
+}
diff --git a/reencode_compactwsifunder.go b/reencode_compactwsifunder.go
new file mode 100644
index 0000000..2349104
--- /dev/null
+++ b/reencode_compactwsifunder.go
@@ -0,0 +1,106 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "bytes"
+
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+type reEncodeCompactWSIfUnder struct {
+ out reEncoderModule
+
+ // CompactWSIfUnder runs uses reEncodeCompactWScauses for
+ // individual elements if doing so would cause that element to
+ // be under this number of bytes.
+ //
+ // This has O(2^min(CompactWSIfUnder, depth)) time overhead,
+ // so set with caution.
+ CompactWSIfUnder int
+
+ // state
+ compactor reEncodeWrite
+ compacted bytes.Buffer
+ full []handleRuneCall
+ endWhenStackSize int
+}
+
+var _ reEncoderModule = (*reEncodeCompactWSIfUnder)(nil)
+
+type handleRuneCall struct {
+ c rune
+ t jsonparse.RuneType
+ escape BackslashEscapeMode
+ stackSize int
+}
+
+func (enc *reEncodeCompactWSIfUnder) reset() {
+ enc.compactor = reEncodeWrite{}
+ enc.compacted.Reset()
+ enc.full = enc.full[:0]
+ enc.endWhenStackSize = 0
+}
+
+func (enc *reEncodeCompactWSIfUnder) PopWriteBarrier() {
+ enc.out.PopWriteBarrier()
+}
+
+func (enc *reEncodeCompactWSIfUnder) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error {
+ if enc.compactor.out == nil { // not speculating
+ switch t {
+ case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating
+ enc.endWhenStackSize = stackSize - 1
+ enc.compactor = reEncodeWrite{
+ out: &enc.compacted,
+ }
+ enc.full = append(enc.full, handleRuneCall{
+ c: c,
+ t: t,
+ escape: escape,
+ stackSize: stackSize,
+ })
+ return enc.compactor.HandleRune(c, t, escape, stackSize)
+ default:
+ return enc.out.HandleRune(c, t, escape, stackSize)
+ }
+ } else { // speculating
+ enc.full = append(enc.full, handleRuneCall{
+ c: c,
+ t: t,
+ escape: escape,
+ stackSize: stackSize,
+ })
+ if t != jsonparse.RuneTypeSpace {
+ if err := enc.compactor.HandleRune(c, t, escape, stackSize); err != nil {
+ return err
+ }
+ }
+ switch {
+ case enc.compacted.Len() >= enc.CompactWSIfUnder: // stop speculating; use indent
+ buf := append([]handleRuneCall(nil), enc.full...)
+ enc.reset()
+ if err := enc.out.HandleRune(buf[0].c, buf[0].t, buf[0].escape, buf[0].stackSize); err != nil {
+ return err
+ }
+ for _, tuple := range buf[1:] {
+ if err := enc.HandleRune(tuple.c, tuple.t, tuple.escape, tuple.stackSize); err != nil {
+ return err
+ }
+ }
+ case stackSize == enc.endWhenStackSize: // stop speculating; use compact
+ for _, tuple := range enc.full {
+ if tuple.t == jsonparse.RuneTypeSpace {
+ continue
+ }
+ if err := enc.out.HandleRune(tuple.c, tuple.t, tuple.escape, tuple.stackSize); err != nil {
+ return err
+ }
+ }
+ enc.reset()
+ }
+ return nil
+ }
+}
diff --git a/reencode_indent.go b/reencode_indent.go
new file mode 100644
index 0000000..90b35db
--- /dev/null
+++ b/reencode_indent.go
@@ -0,0 +1,102 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+type reEncodeIndent struct {
+ out reEncoderModule
+
+ // String to use to indent.
+ //
+ // Newlines are emitted *between* top-level values; a newline
+ // is not emitted after the *last* top-level value.
+ Indent string
+
+ // String to put before indents.
+ Prefix string
+
+ // state
+ lastNonSpace jsonparse.RuneType
+ lastNonSpaceNonEOF jsonparse.RuneType
+ curIndent int
+}
+
+var _ reEncoderModule = (*reEncodeIndent)(nil)
+
+func (enc *reEncodeIndent) PopWriteBarrier() {
+ enc.lastNonSpace = enc.lastNonSpaceNonEOF
+ enc.out.PopWriteBarrier()
+}
+
+func (enc *reEncodeIndent) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error {
+ // emit newlines between top-level values
+ if enc.lastNonSpace == jsonparse.RuneTypeEOF && t != jsonparse.RuneTypeSpace {
+ if err := enc.out.HandleRune('\n', jsonparse.RuneTypeSpace, 0, 0); err != nil {
+ return err
+ }
+ }
+
+ // indent
+ switch t {
+ case jsonparse.RuneTypeSpace:
+ // let us manage whitespace, don't pass it through
+ return nil
+ case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd:
+ enc.curIndent--
+ switch enc.lastNonSpace {
+ case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg:
+ // collapse
+ default:
+ if err := enc.emitNlIndent(stackSize + 1); err != nil {
+ return err
+ }
+ }
+ default:
+ switch enc.lastNonSpace {
+ case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma:
+ if err := enc.emitNlIndent(stackSize); err != nil {
+ return err
+ }
+ case jsonparse.RuneTypeObjectColon:
+ if err := enc.out.HandleRune(' ', jsonparse.RuneTypeSpace, 0, stackSize); err != nil {
+ return err
+ }
+ }
+ switch t {
+ case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg:
+ enc.curIndent++
+ }
+ }
+
+ if t != jsonparse.RuneTypeSpace {
+ enc.lastNonSpace = t
+ if t != jsonparse.RuneTypeEOF {
+ enc.lastNonSpaceNonEOF = t
+ }
+ }
+ return enc.out.HandleRune(c, t, escape, stackSize)
+}
+
+func (enc *reEncodeIndent) emitNlIndent(stackSize int) error {
+ if err := enc.out.HandleRune('\n', jsonparse.RuneTypeSpace, 0, stackSize); err != nil {
+ return err
+ }
+ for _, c := range enc.Prefix {
+ if err := enc.out.HandleRune(c, jsonparse.RuneTypeSpace, 0, stackSize); err != nil {
+ return err
+ }
+ }
+ for i := 0; i < enc.curIndent; i++ {
+ for _, c := range enc.Indent {
+ if err := enc.out.HandleRune(c, jsonparse.RuneTypeSpace, 0, stackSize); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
diff --git a/reencode_nl.go b/reencode_nl.go
new file mode 100644
index 0000000..b7a3cd9
--- /dev/null
+++ b/reencode_nl.go
@@ -0,0 +1,39 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+type reEncodeForceNL struct {
+ out reEncoderModule
+
+ // state
+ skipNL bool
+}
+
+var _ reEncoderModule = (*reEncodeForceNL)(nil)
+
+func (enc *reEncodeForceNL) PopWriteBarrier() {
+ enc.out.PopWriteBarrier()
+}
+
+func (enc *reEncodeForceNL) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error {
+ switch {
+ case t == jsonparse.RuneTypeEOF:
+ if err := enc.out.HandleRune('\n', jsonparse.RuneTypeSpace, 0, stackSize); err != nil {
+ return err
+ }
+ enc.skipNL = true
+ return enc.out.HandleRune(c, t, escape, stackSize)
+ case c == '\n' && t == jsonparse.RuneTypeSpace && enc.skipNL:
+ enc.skipNL = false
+ return nil
+ default:
+ enc.skipNL = false
+ return enc.out.HandleRune(c, t, escape, stackSize)
+ }
+}
diff --git a/reencode_string.go b/reencode_string.go
new file mode 100644
index 0000000..ab148d6
--- /dev/null
+++ b/reencode_string.go
@@ -0,0 +1,33 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+type reEncodeString struct {
+ out reEncoderModule
+
+ // BackslashEscape returns whether a given character in a
+ // string should be backslash-escaped. The bool argument is
+ // whether it was \u-escaped in the input. This does not
+ // affect characters that must or must-not be escaped to be
+ // valid JSON.
+ BackslashEscape BackslashEscaper
+}
+
+var _ reEncoderModule = (*reEncodeString)(nil)
+
+func (enc *reEncodeString) PopWriteBarrier() {
+ enc.out.PopWriteBarrier()
+}
+
+func (enc *reEncodeString) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error {
+ if t == jsonparse.RuneTypeStringChar {
+ escape = enc.BackslashEscape(c, escape)
+ }
+ return enc.out.HandleRune(c, t, escape, stackSize)
+}
diff --git a/reencode_test.go b/reencode_test.go
index 38f3f8f..83660ef 100644
--- a/reencode_test.go
+++ b/reencode_test.go
@@ -66,12 +66,13 @@ func TestReEncode(t *testing.T) {
"arrays2": {
enc: ReEncoderConfig{
Indent: "\t",
- CompactIfUnder: 10,
+ CompactIfUnder: 15,
ForceTrailingNewlines: true,
},
in: []any{
map[string]any{
"a": 1,
+ "b": 2,
},
map[string]any{
"generation": 123456,
@@ -81,7 +82,7 @@ func TestReEncode(t *testing.T) {
},
},
exp: `[
- {"a":1},
+ {"a":1,"b":2},
{
"generation": 123456
},
@@ -120,6 +121,16 @@ func TestReEncode(t *testing.T) {
]
`,
},
+ "indent-unicode": {
+ enc: ReEncoderConfig{
+ Prefix: "—",
+ Indent: "»",
+ },
+ in: []int{9},
+ exp: `[
+—»9
+—]`,
+ },
}
for tcName, tc := range testcases {
tc := tc
diff --git a/reencode_write.go b/reencode_write.go
new file mode 100644
index 0000000..dffbb21
--- /dev/null
+++ b/reencode_write.go
@@ -0,0 +1,52 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "git.lukeshu.com/go/lowmemjson/internal/fastio"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
+)
+
+type reEncodeWrite struct {
+ out fastio.AllWriter
+
+ // state
+ last jsonparse.RuneType
+ lastNonEOF jsonparse.RuneType
+ wasNumber bool
+}
+
+var _ reEncoderModule = (*reEncodeWrite)(nil)
+
+func (enc *reEncodeWrite) PopWriteBarrier() {
+ enc.last = enc.lastNonEOF
+}
+
+func (enc *reEncodeWrite) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, _ int) error {
+ // emit newlines between top-level values, if nescessary
+ if enc.last == jsonparse.RuneTypeEOF && enc.wasNumber && t.IsNumber() {
+ if _, err := enc.out.WriteRune('\n'); err != nil {
+ return err
+ }
+ }
+ if t == jsonparse.RuneTypeEOF {
+ enc.wasNumber = enc.last.IsNumber()
+ } else {
+ enc.lastNonEOF = t
+ }
+ enc.last = t
+
+ // emit the rune
+ switch t {
+ case jsonparse.RuneTypeEOF:
+ return nil
+ case jsonparse.RuneTypeStringChar:
+ return jsonstring.WriteStringChar(enc.out, c, escape)
+ default:
+ _, err := enc.out.WriteRune(c)
+ return err
+ }
+}