summaryrefslogtreecommitdiff
path: root/reencode.go
diff options
context:
space:
mode:
Diffstat (limited to 'reencode.go')
-rw-r--r--reencode.go286
1 files changed, 176 insertions, 110 deletions
diff --git a/reencode.go b/reencode.go
index 876af62..eae80db 100644
--- a/reencode.go
+++ b/reencode.go
@@ -35,8 +35,8 @@ type ReEncoderConfig struct {
//
// Has no affect if Compact is true or Indent is empty.
//
- // This has O((CompactIfUnder+1)^2) memory overhead, so set
- // with caution.
+ // his has O(2^min(CompactIfUnder, depth)) time overhead, so
+ // set with caution.
CompactIfUnder int
// String to use to indent; ignored if Compact is true.
@@ -71,7 +71,8 @@ type ReEncoderConfig struct {
func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
return &ReEncoder{
ReEncoderConfig: cfg,
- out: out,
+ out: internal.NewAllWriter(out),
+ specu: new(speculation),
}
}
@@ -82,12 +83,12 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
// This is useful for prettifying, minifying, sanitizing, and/or
// validating JSON.
//
-// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth).
+// The memory use of a ReEncoder is O(CompactIfUnder+depth).
type ReEncoder struct {
ReEncoderConfig
- out io.Writer
+ out internal.AllWriter
- // state: .Write's utf8-decoding buffer
+ // state: .Write's and .WriteString's utf8-decoding buffer
buf [utf8.UTFMax]byte
bufLen int
@@ -98,27 +99,47 @@ type ReEncoder struct {
inputPos int64
// state: .handleRune
- handleRuneState struct {
- lastNonSpace internal.RuneType
- wasNumber bool
- curIndent int
- uhex [4]byte // "\uABCD"-encoded characters in strings
- fracZeros int64
- expZero bool
-
- specu *speculation
- }
+ lastNonSpace internal.RuneType
+ wasNumber bool
+ curIndent int
+ uhex [4]byte // "\uABCD"-encoded characters in strings
+ fracZeros int64
+ expZero bool
+ specu *speculation
+
+ // state: .pushBarrier and .popBarrier
+ stackInputPos []int64
}
type speculation struct {
- compactFmt ReEncoder
- compactBuf bytes.Buffer
- indentFmt ReEncoder
- indentBuf bytes.Buffer
+ speculating bool
+ endWhenStackSize int
+ fmt ReEncoder
+ compact bytes.Buffer
+ buf []inputTuple
+}
+
+func (specu *speculation) Reset() {
+ specu.speculating = false
+ specu.endWhenStackSize = 0
+ specu.fmt = ReEncoder{}
+ specu.compact.Reset()
+ specu.buf = specu.buf[:0]
+}
+
+type inputTuple struct {
+ c rune
+ t internal.RuneType
+ stackSize int
}
// public API //////////////////////////////////////////////////////////////////
+var (
+ _ internal.AllWriter = (*ReEncoder)(nil)
+ _ io.Closer = (*ReEncoder)(nil)
+)
+
// Write implements io.Writer; it does what you'd expect.
//
// It is worth noting that Write returns the number of bytes consumed
@@ -152,6 +173,38 @@ func (enc *ReEncoder) Write(p []byte) (int, error) {
return len(p), nil
}
+// WriteString implements io.StringWriter; it does what you'd expect,
+// but see the notes on the Write method.
+func (enc *ReEncoder) WriteString(p string) (int, error) {
+ if len(p) == 0 {
+ return 0, nil
+ }
+ var n int
+ if enc.bufLen > 0 {
+ copy(enc.buf[enc.bufLen:], p)
+ c, size := utf8.DecodeRune(enc.buf[:])
+ n += size - enc.bufLen
+ enc.bufLen = 0
+ if _, err := enc.WriteRune(c); err != nil {
+ return 0, err
+ }
+ }
+ for utf8.FullRuneInString(p[n:]) {
+ c, size := utf8.DecodeRuneInString(p[n:])
+ if _, err := enc.WriteRune(c); err != nil {
+ return n, err
+ }
+ n += size
+ }
+ enc.bufLen = copy(enc.buf[:], p[n:])
+ return len(p), nil
+}
+
+// WriteByte implements io.ByteWriter; it does what you'd expect.
+func (enc *ReEncoder) WriteByte(b byte) error {
+ return internal.WriteByte(enc, b)
+}
+
// Close implements io.Closer; it does what you'd expect, mostly.
//
// The *ReEncoder may continue to be written to with new JSON values
@@ -170,14 +223,14 @@ func (enc *ReEncoder) Close() error {
}
return enc.err
}
- if err := enc.handleRune(0, internal.RuneTypeError); err != nil {
+ if err := enc.handleRune(0, internal.RuneTypeError, enc.par.StackSize()); err != nil {
enc.err = &ReEncodeSyntaxError{
Err: err,
Offset: enc.inputPos,
}
return enc.err
}
- if enc.AllowMultipleValues {
+ if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 {
enc.par.Reset()
}
return nil
@@ -212,9 +265,9 @@ rehandle:
}
return enc.written, enc.err
}
- enc.err = enc.handleRune(c, t)
+ enc.err = enc.handleRune(c, t, enc.par.StackSize())
if enc.err == nil && t == internal.RuneTypeEOF {
- if enc.AllowMultipleValues {
+ if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 {
enc.par.Reset()
goto rehandle
} else {
@@ -230,32 +283,47 @@ rehandle:
return enc.written, enc.err
}
+// semi-public API /////////////////////////////////////////////////////////////
+
+func (enc *ReEncoder) pushWriteBarrier() {
+ enc.par.PushWriteBarrier()
+ enc.stackInputPos = append(enc.stackInputPos, enc.inputPos)
+ enc.inputPos = 0
+}
+
+func (enc *ReEncoder) popWriteBarrier() {
+ enc.par.PopBarrier()
+ enc.inputPos += enc.stackInputPos[len(enc.stackInputPos)-1]
+ enc.stackInputPos = enc.stackInputPos[:len(enc.stackInputPos)-1]
+}
+
// internal ////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error {
+func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error {
if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" {
return enc.handleRuneNoSpeculation(c, t)
}
// main
- if enc.handleRuneState.specu == nil { // not speculating
+ if !enc.specu.speculating { // not speculating
switch t {
case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating
if err, _ := enc.handleRunePre(c, t); err != nil {
return err
}
- specu := &speculation{
- compactFmt: *enc,
- indentFmt: *enc,
- }
- specu.compactFmt.Compact = true
- specu.compactFmt.out = &specu.compactBuf
- specu.indentFmt.out = &specu.indentBuf
- enc.handleRuneState.specu = specu
- if err := specu.compactFmt.handleRuneMain(c, t); err != nil {
- return err
+ enc.specu.speculating = true
+ enc.specu.endWhenStackSize = stackSize - 1
+ enc.specu.fmt = ReEncoder{
+ ReEncoderConfig: enc.ReEncoderConfig,
+ out: &enc.specu.compact,
}
- if err := specu.indentFmt.handleRuneMain(c, t); err != nil {
+ enc.specu.fmt.Compact = true
+ enc.specu.buf = append(enc.specu.buf, inputTuple{
+ c: c,
+ t: t,
+ stackSize: stackSize,
+ })
+ if err := enc.specu.fmt.handleRuneMain(c, t); err != nil {
return err
}
default:
@@ -264,31 +332,33 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error {
}
}
} else { // speculating
-
- // canCompress is whether we're 1-up from the leaf;
- // set this *before* the calls to .handleRune.
- canCompress := enc.handleRuneState.specu.indentFmt.handleRuneState.specu == nil
-
- if err := enc.handleRuneState.specu.compactFmt.handleRune(c, t); err != nil {
+ enc.specu.buf = append(enc.specu.buf, inputTuple{
+ c: c,
+ t: t,
+ stackSize: stackSize,
+ })
+ if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil {
return err
}
- if err := enc.handleRuneState.specu.indentFmt.handleRune(c, t); err != nil {
- return err
- }
-
switch {
- case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent
- if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil {
+ case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent
+ buf := append([]inputTuple(nil), enc.specu.buf...)
+ enc.specu.Reset()
+ if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil {
return err
}
- enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState
- case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact
- if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil {
+ for _, tuple := range buf[1:] {
+ if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil {
+ return err
+ }
+ }
+ case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact
+ if _, err := enc.specu.compact.WriteTo(enc.out); err != nil {
return err
}
- enc.handleRuneState.lastNonSpace = t
- enc.handleRuneState.curIndent--
- enc.handleRuneState.specu = nil
+ enc.specu.Reset()
+ enc.lastNonSpace = t
+ enc.curIndent--
}
}
@@ -310,9 +380,9 @@ func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error
// the new rune itself is handled.
func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) {
// emit newlines between top-level values
- if enc.handleRuneState.lastNonSpace == internal.RuneTypeEOF {
+ if enc.lastNonSpace == internal.RuneTypeEOF {
switch {
- case enc.handleRuneState.wasNumber && t.IsNumber():
+ case enc.wasNumber && t.IsNumber():
if err := enc.emitByte('\n'); err != nil {
return err, false
}
@@ -326,35 +396,35 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) {
// shorten numbers
switch t { // trim trailing '0's from the fraction-part, but don't remove all digits
case internal.RuneTypeNumberFracDot:
- enc.handleRuneState.fracZeros = 0
+ enc.fracZeros = 0
case internal.RuneTypeNumberFracDig:
- if c == '0' && enc.handleRuneState.lastNonSpace == internal.RuneTypeNumberFracDig {
- enc.handleRuneState.fracZeros++
+ if c == '0' && enc.lastNonSpace == internal.RuneTypeNumberFracDig {
+ enc.fracZeros++
return nil, false
}
fallthrough
default:
- for enc.handleRuneState.fracZeros > 0 {
+ for enc.fracZeros > 0 {
if err := enc.emitByte('0'); err != nil {
return err, false
}
- enc.handleRuneState.fracZeros--
+ enc.fracZeros--
}
}
switch t { // trim leading '0's from the exponent-part, but don't remove all digits
case internal.RuneTypeNumberExpE, internal.RuneTypeNumberExpSign:
- enc.handleRuneState.expZero = true
+ enc.expZero = true
case internal.RuneTypeNumberExpDig:
- if c == '0' && enc.handleRuneState.expZero {
+ if c == '0' && enc.expZero {
return nil, false
}
- enc.handleRuneState.expZero = false
+ enc.expZero = false
default:
- if enc.handleRuneState.expZero {
+ if enc.expZero {
if err := enc.emitByte('0'); err != nil {
return err, false
}
- enc.handleRuneState.expZero = false
+ enc.expZero = false
}
}
@@ -370,8 +440,8 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) {
// let us manage whitespace, don't pass it through
return nil, false
case internal.RuneTypeObjectEnd, internal.RuneTypeArrayEnd:
- enc.handleRuneState.curIndent--
- switch enc.handleRuneState.lastNonSpace {
+ enc.curIndent--
+ switch enc.lastNonSpace {
case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg:
// collapse
default:
@@ -380,7 +450,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) {
}
}
default:
- switch enc.handleRuneState.lastNonSpace {
+ switch enc.lastNonSpace {
case internal.RuneTypeObjectBeg, internal.RuneTypeObjectComma, internal.RuneTypeArrayBeg, internal.RuneTypeArrayComma:
if err := enc.emitNlIndent(); err != nil {
return err, false
@@ -392,7 +462,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) {
}
switch t {
case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg:
- enc.handleRuneState.curIndent++
+ enc.curIndent++
}
}
}
@@ -402,76 +472,72 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) {
// handleRuneMain handles the new rune itself, not buffered things.
func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error {
- defer func() {
- if t != internal.RuneTypeSpace {
- enc.handleRuneState.lastNonSpace = t
- }
- }()
-
+ var err error
switch t {
case internal.RuneTypeStringChar:
- return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape))
case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU:
- return nil
+ // do nothing
case internal.RuneTypeStringEsc1:
switch c {
case '"':
- return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape))
case '\\':
- return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape))
case '/':
- return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape))
case 'b':
- return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape))
case 'f':
- return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape))
case 'n':
- return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape))
case 'r':
- return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape))
case 't':
- return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape))
+ err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape))
default:
panic("should not happen")
}
case internal.RuneTypeStringEscUA:
- enc.handleRuneState.uhex[0], _ = internal.HexToInt(c)
- return nil
+ enc.uhex[0], _ = internal.HexToInt(c)
case internal.RuneTypeStringEscUB:
- enc.handleRuneState.uhex[1], _ = internal.HexToInt(c)
- return nil
+ enc.uhex[1], _ = internal.HexToInt(c)
case internal.RuneTypeStringEscUC:
- enc.handleRuneState.uhex[2], _ = internal.HexToInt(c)
- return nil
+ enc.uhex[2], _ = internal.HexToInt(c)
case internal.RuneTypeStringEscUD:
- enc.handleRuneState.uhex[3], _ = internal.HexToInt(c)
+ enc.uhex[3], _ = internal.HexToInt(c)
c := 0 |
- rune(enc.handleRuneState.uhex[0])<<12 |
- rune(enc.handleRuneState.uhex[1])<<8 |
- rune(enc.handleRuneState.uhex[2])<<4 |
- rune(enc.handleRuneState.uhex[3])<<0
- return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape))
+ rune(enc.uhex[0])<<12 |
+ rune(enc.uhex[1])<<8 |
+ rune(enc.uhex[2])<<4 |
+ rune(enc.uhex[3])<<0
+ err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape))
case internal.RuneTypeError: // EOF explicitly stated by .Close()
fallthrough
case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value
- enc.handleRuneState.wasNumber = enc.handleRuneState.lastNonSpace.IsNumber()
+ enc.wasNumber = enc.lastNonSpace.IsNumber()
switch {
- case enc.ForceTrailingNewlines:
- t = internal.RuneTypeError // enc.handleRuneState.lastNonSpace : an NL isn't needed (we already printed one)
- return enc.emitByte('\n')
+ case enc.ForceTrailingNewlines && len(enc.stackInputPos) == 0:
+ t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
+ err = enc.emitByte('\n')
default:
- t = internal.RuneTypeEOF // enc.handleRuneState.lastNonSpace : an NL *might* be needed
- return nil
+ t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed
}
default:
- return enc.emitByte(byte(c))
+ err = enc.emitByte(byte(c))
}
+
+ if t != internal.RuneTypeSpace {
+ enc.lastNonSpace = t
+ }
+ return err
}
func (enc *ReEncoder) emitByte(c byte) error {
- err := writeByte(enc.out, c)
+ err := enc.out.WriteByte(c)
if err == nil {
enc.written++
}
@@ -488,12 +554,12 @@ func (enc *ReEncoder) emitNlIndent() error {
return err
}
if enc.Prefix != "" {
- if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil {
+ if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil {
return err
}
}
- for i := 0; i < enc.handleRuneState.curIndent; i++ {
- if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil {
+ for i := 0; i < enc.curIndent; i++ {
+ if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil {
return err
}
}