From 2828fa21c0ffd2a32a108b37c0417b01abc42929 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 26 Jan 2023 21:02:56 -0700 Subject: Avoid doing type switching in inner functions The CPU profiler tells me that the encoder is spending a lot of time on type switches. --- reencode.go | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index 876af62..393e8c6 100644 --- a/reencode.go +++ b/reencode.go @@ -71,7 +71,7 @@ type ReEncoderConfig struct { func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ ReEncoderConfig: cfg, - out: out, + out: internal.NewAllWriter(out), } } @@ -85,9 +85,9 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). type ReEncoder struct { ReEncoderConfig - out io.Writer + out internal.AllWriter - // state: .Write's utf8-decoding buffer + // state: .Write's and .WriteString's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int @@ -119,6 +119,11 @@ type speculation struct { // public API ////////////////////////////////////////////////////////////////// +var ( + _ internal.AllWriter = (*ReEncoder)(nil) + _ io.Closer = (*ReEncoder)(nil) +) + // Write implements io.Writer; it does what you'd expect. // // It is worth noting that Write returns the number of bytes consumed @@ -152,6 +157,38 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { return len(p), nil } +// WriteString implements io.StringWriter; it does what you'd expect, +// but see the notes on the Write method. +func (enc *ReEncoder) WriteString(p string) (int, error) { + if len(p) == 0 { + return 0, nil + } + var n int + if enc.bufLen > 0 { + copy(enc.buf[enc.bufLen:], p) + c, size := utf8.DecodeRune(enc.buf[:]) + n += size - enc.bufLen + enc.bufLen = 0 + if _, err := enc.WriteRune(c); err != nil { + return 0, err + } + } + for utf8.FullRuneInString(p[n:]) { + c, size := utf8.DecodeRuneInString(p[n:]) + if _, err := enc.WriteRune(c); err != nil { + return n, err + } + n += size + } + enc.bufLen = copy(enc.buf[:], p[n:]) + return len(p), nil +} + +// WriteByte implements io.ByteWriter; it does what you'd expect. +func (enc *ReEncoder) WriteByte(b byte) error { + return internal.WriteByte(enc, b) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -471,7 +508,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { } func (enc *ReEncoder) emitByte(c byte) error { - err := writeByte(enc.out, c) + err := enc.out.WriteByte(c) if err == nil { enc.written++ } @@ -488,12 +525,12 @@ func (enc *ReEncoder) emitNlIndent() error { return err } if enc.Prefix != "" { - if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil { + if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil { return err } } for i := 0; i < enc.handleRuneState.curIndent; i++ { - if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil { + if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { return err } } -- cgit v1.2.3-2-g168b From 19f9c9c972c5cfc64de08ba581cc24d96426e73c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 27 Jan 2023 13:44:43 -0700 Subject: reencode: Rethink CompactIfUnder to have linear memory --- reencode.go | 79 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 34 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index 393e8c6..b3f4d20 100644 --- a/reencode.go +++ b/reencode.go @@ -35,8 +35,8 @@ type ReEncoderConfig struct { // // Has no affect if Compact is true or Indent is empty. // - // This has O((CompactIfUnder+1)^2) memory overhead, so set - // with caution. + // his has O(2^min(CompactIfUnder, depth)) time overhead, so + // set with caution. CompactIfUnder int // String to use to indent; ignored if Compact is true. @@ -82,7 +82,7 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // This is useful for prettifying, minifying, sanitizing, and/or // validating JSON. // -// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). +// The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { ReEncoderConfig out internal.AllWriter @@ -111,10 +111,16 @@ type ReEncoder struct { } type speculation struct { - compactFmt ReEncoder - compactBuf bytes.Buffer - indentFmt ReEncoder - indentBuf bytes.Buffer + endWhenStackSize int + fmt ReEncoder + compact bytes.Buffer + buf []inputTuple +} + +type inputTuple struct { + c rune + t internal.RuneType + stackSize int } // public API ////////////////////////////////////////////////////////////////// @@ -207,7 +213,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if err := enc.handleRune(0, internal.RuneTypeError); err != nil { + if err := enc.handleRune(0, internal.RuneTypeError, enc.par.StackSize()); err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, Offset: enc.inputPos, @@ -249,7 +255,7 @@ rehandle: } return enc.written, enc.err } - enc.err = enc.handleRune(c, t) + enc.err = enc.handleRune(c, t, enc.par.StackSize()) if enc.err == nil && t == internal.RuneTypeEOF { if enc.AllowMultipleValues { enc.par.Reset() @@ -269,7 +275,7 @@ rehandle: // internal //////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { +func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -282,17 +288,20 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { return err } specu := &speculation{ - compactFmt: *enc, - indentFmt: *enc, + endWhenStackSize: stackSize - 1, + fmt: ReEncoder{ + ReEncoderConfig: enc.ReEncoderConfig, + }, } - specu.compactFmt.Compact = true - specu.compactFmt.out = &specu.compactBuf - specu.indentFmt.out = &specu.indentBuf + specu.fmt.Compact = true + specu.fmt.out = &specu.compact enc.handleRuneState.specu = specu - if err := specu.compactFmt.handleRuneMain(c, t); err != nil { - return err - } - if err := specu.indentFmt.handleRuneMain(c, t); err != nil { + enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + c: c, + t: t, + stackSize: stackSize, + }) + if err := specu.fmt.handleRuneMain(c, t); err != nil { return err } default: @@ -301,26 +310,28 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { } } } else { // speculating - - // canCompress is whether we're 1-up from the leaf; - // set this *before* the calls to .handleRune. - canCompress := enc.handleRuneState.specu.indentFmt.handleRuneState.specu == nil - - if err := enc.handleRuneState.specu.compactFmt.handleRune(c, t); err != nil { + enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + c: c, + t: t, + stackSize: stackSize, + }) + if err := enc.handleRuneState.specu.fmt.handleRune(c, t, stackSize); err != nil { return err } - if err := enc.handleRuneState.specu.indentFmt.handleRune(c, t); err != nil { - return err - } - switch { - case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent - if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil { + case enc.handleRuneState.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + buf := enc.handleRuneState.specu.buf + enc.handleRuneState.specu = nil + if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } - enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState - case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil { + for _, tuple := range buf[1:] { + if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + return err + } + } + case stackSize == enc.handleRuneState.specu.endWhenStackSize: // stop speculating; use compact + if _, err := enc.handleRuneState.specu.compact.WriteTo(enc.out); err != nil { return err } enc.handleRuneState.lastNonSpace = t -- cgit v1.2.3-2-g168b From bf28b48d23d19990190d5e5aeaee14ea6652a293 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 10:05:06 -0700 Subject: reencode: Wrapping the handleRune state is no longer necessary --- reencode.go | 101 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 49 insertions(+), 52 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index b3f4d20..d588b1b 100644 --- a/reencode.go +++ b/reencode.go @@ -98,16 +98,13 @@ type ReEncoder struct { inputPos int64 // state: .handleRune - handleRuneState struct { - lastNonSpace internal.RuneType - wasNumber bool - curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings - fracZeros int64 - expZero bool - - specu *speculation - } + lastNonSpace internal.RuneType + wasNumber bool + curIndent int + uhex [4]byte // "\uABCD"-encoded characters in strings + fracZeros int64 + expZero bool + specu *speculation } type speculation struct { @@ -281,7 +278,7 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } // main - if enc.handleRuneState.specu == nil { // not speculating + if enc.specu == nil { // not speculating switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { @@ -295,8 +292,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } specu.fmt.Compact = true specu.fmt.out = &specu.compact - enc.handleRuneState.specu = specu - enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + enc.specu = specu + enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, stackSize: stackSize, @@ -310,18 +307,18 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } } } else { // speculating - enc.handleRuneState.specu.buf = append(enc.handleRuneState.specu.buf, inputTuple{ + enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, stackSize: stackSize, }) - if err := enc.handleRuneState.specu.fmt.handleRune(c, t, stackSize); err != nil { + if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { return err } switch { - case enc.handleRuneState.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent - buf := enc.handleRuneState.specu.buf - enc.handleRuneState.specu = nil + case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + buf := enc.specu.buf + enc.specu = nil if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } @@ -330,13 +327,13 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err return err } } - case stackSize == enc.handleRuneState.specu.endWhenStackSize: // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compact.WriteTo(enc.out); err != nil { + case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact + if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { return err } - enc.handleRuneState.lastNonSpace = t - enc.handleRuneState.curIndent-- - enc.handleRuneState.specu = nil + enc.lastNonSpace = t + enc.curIndent-- + enc.specu = nil } } @@ -358,9 +355,9 @@ func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error // the new rune itself is handled. func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // emit newlines between top-level values - if enc.handleRuneState.lastNonSpace == internal.RuneTypeEOF { + if enc.lastNonSpace == internal.RuneTypeEOF { switch { - case enc.handleRuneState.wasNumber && t.IsNumber(): + case enc.wasNumber && t.IsNumber(): if err := enc.emitByte('\n'); err != nil { return err, false } @@ -374,35 +371,35 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits case internal.RuneTypeNumberFracDot: - enc.handleRuneState.fracZeros = 0 + enc.fracZeros = 0 case internal.RuneTypeNumberFracDig: - if c == '0' && enc.handleRuneState.lastNonSpace == internal.RuneTypeNumberFracDig { - enc.handleRuneState.fracZeros++ + if c == '0' && enc.lastNonSpace == internal.RuneTypeNumberFracDig { + enc.fracZeros++ return nil, false } fallthrough default: - for enc.handleRuneState.fracZeros > 0 { + for enc.fracZeros > 0 { if err := enc.emitByte('0'); err != nil { return err, false } - enc.handleRuneState.fracZeros-- + enc.fracZeros-- } } switch t { // trim leading '0's from the exponent-part, but don't remove all digits case internal.RuneTypeNumberExpE, internal.RuneTypeNumberExpSign: - enc.handleRuneState.expZero = true + enc.expZero = true case internal.RuneTypeNumberExpDig: - if c == '0' && enc.handleRuneState.expZero { + if c == '0' && enc.expZero { return nil, false } - enc.handleRuneState.expZero = false + enc.expZero = false default: - if enc.handleRuneState.expZero { + if enc.expZero { if err := enc.emitByte('0'); err != nil { return err, false } - enc.handleRuneState.expZero = false + enc.expZero = false } } @@ -418,8 +415,8 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // let us manage whitespace, don't pass it through return nil, false case internal.RuneTypeObjectEnd, internal.RuneTypeArrayEnd: - enc.handleRuneState.curIndent-- - switch enc.handleRuneState.lastNonSpace { + enc.curIndent-- + switch enc.lastNonSpace { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // collapse default: @@ -428,7 +425,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } } default: - switch enc.handleRuneState.lastNonSpace { + switch enc.lastNonSpace { case internal.RuneTypeObjectBeg, internal.RuneTypeObjectComma, internal.RuneTypeArrayBeg, internal.RuneTypeArrayComma: if err := enc.emitNlIndent(); err != nil { return err, false @@ -440,7 +437,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: - enc.handleRuneState.curIndent++ + enc.curIndent++ } } } @@ -452,7 +449,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { defer func() { if t != internal.RuneTypeSpace { - enc.handleRuneState.lastNonSpace = t + enc.lastNonSpace = t } }() @@ -484,33 +481,33 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { panic("should not happen") } case internal.RuneTypeStringEscUA: - enc.handleRuneState.uhex[0], _ = internal.HexToInt(c) + enc.uhex[0], _ = internal.HexToInt(c) return nil case internal.RuneTypeStringEscUB: - enc.handleRuneState.uhex[1], _ = internal.HexToInt(c) + enc.uhex[1], _ = internal.HexToInt(c) return nil case internal.RuneTypeStringEscUC: - enc.handleRuneState.uhex[2], _ = internal.HexToInt(c) + enc.uhex[2], _ = internal.HexToInt(c) return nil case internal.RuneTypeStringEscUD: - enc.handleRuneState.uhex[3], _ = internal.HexToInt(c) + enc.uhex[3], _ = internal.HexToInt(c) c := 0 | - rune(enc.handleRuneState.uhex[0])<<12 | - rune(enc.handleRuneState.uhex[1])<<8 | - rune(enc.handleRuneState.uhex[2])<<4 | - rune(enc.handleRuneState.uhex[3])<<0 + rune(enc.uhex[0])<<12 | + rune(enc.uhex[1])<<8 | + rune(enc.uhex[2])<<4 | + rune(enc.uhex[3])<<0 return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value - enc.handleRuneState.wasNumber = enc.handleRuneState.lastNonSpace.IsNumber() + enc.wasNumber = enc.lastNonSpace.IsNumber() switch { case enc.ForceTrailingNewlines: - t = internal.RuneTypeError // enc.handleRuneState.lastNonSpace : an NL isn't needed (we already printed one) + t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) return enc.emitByte('\n') default: - t = internal.RuneTypeEOF // enc.handleRuneState.lastNonSpace : an NL *might* be needed + t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed return nil } default: @@ -540,7 +537,7 @@ func (enc *ReEncoder) emitNlIndent() error { return err } } - for i := 0; i < enc.handleRuneState.curIndent; i++ { + for i := 0; i < enc.curIndent; i++ { if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { return err } -- cgit v1.2.3-2-g168b From 284be3f68b1eaf2ba693e0a8ae03baa80ebc973f Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 10:43:23 -0700 Subject: reencode: Reuse speculation buffers --- reencode.go | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index d588b1b..49d8ddb 100644 --- a/reencode.go +++ b/reencode.go @@ -72,6 +72,7 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ ReEncoderConfig: cfg, out: internal.NewAllWriter(out), + specu: new(speculation), } } @@ -108,12 +109,21 @@ type ReEncoder struct { } type speculation struct { + speculating bool endWhenStackSize int fmt ReEncoder compact bytes.Buffer buf []inputTuple } +func (specu *speculation) Reset() { + specu.speculating = false + specu.endWhenStackSize = 0 + specu.fmt = ReEncoder{} + specu.compact.Reset() + specu.buf = specu.buf[:0] +} + type inputTuple struct { c rune t internal.RuneType @@ -278,27 +288,25 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } // main - if enc.specu == nil { // not speculating + if !enc.specu.speculating { // not speculating switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { return err } - specu := &speculation{ - endWhenStackSize: stackSize - 1, - fmt: ReEncoder{ - ReEncoderConfig: enc.ReEncoderConfig, - }, + enc.specu.speculating = true + enc.specu.endWhenStackSize = stackSize - 1 + enc.specu.fmt = ReEncoder{ + ReEncoderConfig: enc.ReEncoderConfig, + out: &enc.specu.compact, } - specu.fmt.Compact = true - specu.fmt.out = &specu.compact - enc.specu = specu + enc.specu.fmt.Compact = true enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, stackSize: stackSize, }) - if err := specu.fmt.handleRuneMain(c, t); err != nil { + if err := enc.specu.fmt.handleRuneMain(c, t); err != nil { return err } default: @@ -317,8 +325,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err } switch { case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent - buf := enc.specu.buf - enc.specu = nil + buf := append([]inputTuple(nil), enc.specu.buf...) + enc.specu.Reset() if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } @@ -331,9 +339,9 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { return err } + enc.specu.Reset() enc.lastNonSpace = t enc.curIndent-- - enc.specu = nil } } -- cgit v1.2.3-2-g168b From 659114fee7c39f06c408135169f2848a881dfe5e Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 28 Jan 2023 22:43:32 -0700 Subject: reencode: I see handleRuneMain's defer handling showing up in the CPU profile --- reencode.go | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index 49d8ddb..a5dc3c8 100644 --- a/reencode.go +++ b/reencode.go @@ -455,48 +455,40 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { - defer func() { - if t != internal.RuneTypeSpace { - enc.lastNonSpace = t - } - }() - + var err error switch t { case internal.RuneTypeStringChar: - return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: - return nil + // do nothing case internal.RuneTypeStringEsc1: switch c { case '"': - return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': - return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': - return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': - return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': - return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': - return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': - return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': - return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) default: panic("should not happen") } case internal.RuneTypeStringEscUA: enc.uhex[0], _ = internal.HexToInt(c) - return nil case internal.RuneTypeStringEscUB: enc.uhex[1], _ = internal.HexToInt(c) - return nil case internal.RuneTypeStringEscUC: enc.uhex[2], _ = internal.HexToInt(c) - return nil case internal.RuneTypeStringEscUD: enc.uhex[3], _ = internal.HexToInt(c) c := 0 | @@ -504,7 +496,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { rune(enc.uhex[1])<<8 | rune(enc.uhex[2])<<4 | rune(enc.uhex[3])<<0 - return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough @@ -513,14 +505,18 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { switch { case enc.ForceTrailingNewlines: t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - return enc.emitByte('\n') + err = enc.emitByte('\n') default: t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed - return nil } default: - return enc.emitByte(byte(c)) + err = enc.emitByte(byte(c)) } + + if t != internal.RuneTypeSpace { + enc.lastNonSpace = t + } + return err } func (enc *ReEncoder) emitByte(c byte) error { -- cgit v1.2.3-2-g168b From b3f4186f2b8e992f56f898784b1cd28bfd7550ca Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 29 Jan 2023 20:59:37 -0700 Subject: Invent "barriers" instead of nesting parsers --- reencode.go | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index a5dc3c8..eae80db 100644 --- a/reencode.go +++ b/reencode.go @@ -106,6 +106,9 @@ type ReEncoder struct { fracZeros int64 expZero bool specu *speculation + + // state: .pushBarrier and .popBarrier + stackInputPos []int64 } type speculation struct { @@ -227,7 +230,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() } return nil @@ -264,7 +267,7 @@ rehandle: } enc.err = enc.handleRune(c, t, enc.par.StackSize()) if enc.err == nil && t == internal.RuneTypeEOF { - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() goto rehandle } else { @@ -280,6 +283,20 @@ rehandle: return enc.written, enc.err } +// semi-public API ///////////////////////////////////////////////////////////// + +func (enc *ReEncoder) pushWriteBarrier() { + enc.par.PushWriteBarrier() + enc.stackInputPos = append(enc.stackInputPos, enc.inputPos) + enc.inputPos = 0 +} + +func (enc *ReEncoder) popWriteBarrier() { + enc.par.PopBarrier() + enc.inputPos += enc.stackInputPos[len(enc.stackInputPos)-1] + enc.stackInputPos = enc.stackInputPos[:len(enc.stackInputPos)-1] +} + // internal //////////////////////////////////////////////////////////////////// func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { @@ -503,7 +520,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines: + case enc.ForceTrailingNewlines && len(enc.stackInputPos) == 0: t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) err = enc.emitByte('\n') default: -- cgit v1.2.3-2-g168b