From d19e2c6884c2d409fcc828c870f1839ee84f38cb Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 14 Feb 2023 11:44:36 -0700 Subject: reencode: Factor into separate modules --- reencode.go | 335 ++++++++++++++---------------------------------------------- 1 file changed, 74 insertions(+), 261 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index d8cdb71..7e9b5ff 100644 --- a/reencode.go +++ b/reencode.go @@ -5,14 +5,12 @@ package lowmemjson import ( - "bytes" "fmt" "io" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" - "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -71,10 +69,57 @@ type ReEncoderConfig struct { // calls are syscalls, then you may want to wrap Out in a // bufio.Writer. func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { + var module reEncoderModule + + // Basic + module = &reEncodeWrite{ + out: fastio.NewAllWriter(out), + } + + // Whitespace + if cfg.ForceTrailingNewlines { + module = &reEncodeForceNL{ + out: module, + } + } + switch { + case cfg.Compact: + module = &reEncodeCompactWS{ + out: module, + } + case cfg.Indent != "": + if cfg.CompactIfUnder > 0 { + module = &reEncodeCompactWSIfUnder{ + out: module, + CompactWSIfUnder: cfg.CompactIfUnder, + } + } + module = &reEncodeIndent{ + out: module, + Indent: cfg.Indent, + Prefix: cfg.Prefix, + } + } + + // Numbers + module = &reEncodeCompactNum{ + out: module, + } + + // Strings + escaper := cfg.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } + module = &reEncodeString{ + out: module, + BackslashEscape: escaper, + } + return &ReEncoder{ - cfg: cfg, - out: fastio.NewAllWriter(out), - specu: new(speculation), + out: module, + esc: escaper, + allowMultipleValues: cfg.AllowMultipleValues, } } @@ -87,8 +132,9 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // // The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { - cfg ReEncoderConfig - out fastio.AllWriter + out reEncoderModule + esc BackslashEscaper + allowMultipleValues bool // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer buf [utf8.UTFMax]byte @@ -99,18 +145,11 @@ type ReEncoder struct { par jsonparse.Parser inputPos int64 - // state: .handleRune - lastNonSpace jsonparse.RuneType - lastNonSpaceNonEOF jsonparse.RuneType - wasNumber bool - curIndent int - uhex [3]byte // "\uABCD"-encoded characters in strings - fracZeros int64 - expZero bool - specu *speculation - // state: .pushWriteBarrier and .popWriteBarrier barriers []barrier + + // state: .handleRuneType + uhex [3]byte // "\uABCD"-encoded characters in strings } type barrier struct { @@ -118,26 +157,9 @@ type barrier struct { stackSize int } -type speculation struct { - speculating bool - endWhenStackSize int - fmt ReEncoder - compact bytes.Buffer - buf []inputTuple -} - -func (specu *speculation) Reset() { - specu.speculating = false - specu.endWhenStackSize = 0 - specu.fmt = ReEncoder{} - specu.compact.Reset() - specu.buf = specu.buf[:0] -} - -type inputTuple struct { - c rune - t jsonparse.RuneType - stackSize int +type reEncoderModule interface { + HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error + PopWriteBarrier() } // public API ////////////////////////////////////////////////////////////////// @@ -239,14 +261,14 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRuneType(0, jsonparse.RuneTypeEOF, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } return enc.err } - if enc.cfg.AllowMultipleValues { + if enc.allowMultipleValues { enc.par.Reset() } } @@ -271,7 +293,7 @@ rehandle: return } if t == jsonparse.RuneTypeEOF { - if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { + if enc.allowMultipleValues && len(enc.barriers) == 0 { enc.par.Reset() goto rehandle } else { @@ -301,7 +323,7 @@ func (enc *ReEncoder) popWriteBarrier() { enc.par.PopBarrier() enc.inputPos += enc.barriers[len(enc.barriers)-1].inputPos enc.barriers = enc.barriers[:len(enc.barriers)-1] - enc.lastNonSpace = enc.lastNonSpaceNonEOF + enc.out.PopWriteBarrier() } // internal //////////////////////////////////////////////////////////////////// @@ -315,189 +337,9 @@ func (enc *ReEncoder) stackSize() int { } func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error { - if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { - return enc.handleRuneNoSpeculation(c, t) - } - - // main - if !enc.specu.speculating { // not speculating - switch t { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating - if err, _ := enc.handleRunePre(c, t); err != nil { - return err - } - enc.specu.speculating = true - enc.specu.endWhenStackSize = stackSize - 1 - enc.specu.fmt = ReEncoder{ - cfg: enc.cfg, - out: &enc.specu.compact, - } - enc.specu.fmt.cfg.Compact = true - enc.specu.buf = append(enc.specu.buf, inputTuple{ - c: c, - t: t, - stackSize: stackSize, - }) - if err := enc.specu.fmt.handleRuneMain(c, t); err != nil { - return err - } - default: - if err := enc.handleRuneNoSpeculation(c, t); err != nil { - return err - } - } - } else { // speculating - enc.specu.buf = append(enc.specu.buf, inputTuple{ - c: c, - t: t, - stackSize: stackSize, - }) - if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil { - return err - } - switch { - case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent - buf := append([]inputTuple(nil), enc.specu.buf...) - enc.specu.Reset() - if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { - return err - } - for _, tuple := range buf[1:] { - if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil { - return err - } - } - case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact - if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { - return err - } - enc.specu.Reset() - enc.lastNonSpace = t - enc.curIndent-- - } - } - - return nil -} - -func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t jsonparse.RuneType) error { - err, shouldHandle := enc.handleRunePre(c, t) - if err != nil { - return err - } - if !shouldHandle { - return nil - } - return enc.handleRuneMain(c, t) -} - -// handleRunePre handles buffered things that need to happen before -// the new rune itself is handled. -func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) { - // emit newlines between top-level values - if enc.lastNonSpace == jsonparse.RuneTypeEOF { - switch { - case enc.wasNumber && t.IsNumber(): - if err := enc.out.WriteByte('\n'); err != nil { - return err, false - } - case enc.cfg.Indent != "" && !enc.cfg.Compact: - if err := enc.out.WriteByte('\n'); err != nil { - return err, false - } - } - } - - // shorten numbers - switch t { // trim trailing '0's from the fraction-part, but don't remove all digits - case jsonparse.RuneTypeNumberFracDot: - enc.fracZeros = 0 - case jsonparse.RuneTypeNumberFracDig: - if c == '0' && enc.lastNonSpace == jsonparse.RuneTypeNumberFracDig { - enc.fracZeros++ - return nil, false - } - fallthrough - default: - for enc.fracZeros > 0 { - if err := enc.out.WriteByte('0'); err != nil { - return err, false - } - enc.fracZeros-- - } - } - switch t { // trim leading '0's from the exponent-part, but don't remove all digits - case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign: - enc.expZero = true - case jsonparse.RuneTypeNumberExpDig: - if c == '0' && enc.expZero { - return nil, false - } - enc.expZero = false - default: - if enc.expZero { - if err := enc.out.WriteByte('0'); err != nil { - return err, false - } - enc.expZero = false - } - } - - // whitespace - switch { - case enc.cfg.Compact: - if t == jsonparse.RuneTypeSpace { - return nil, false - } - case enc.cfg.Indent != "": - switch t { - case jsonparse.RuneTypeSpace: - // let us manage whitespace, don't pass it through - return nil, false - case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd: - enc.curIndent-- - switch enc.lastNonSpace { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: - // collapse - default: - if err := enc.emitNlIndent(); err != nil { - return err, false - } - } - default: - switch enc.lastNonSpace { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma: - if err := enc.emitNlIndent(); err != nil { - return err, false - } - case jsonparse.RuneTypeObjectColon: - if err := enc.out.WriteByte(' '); err != nil { - return err, false - } - } - switch t { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: - enc.curIndent++ - } - } - } - - return nil, true -} - -// handleRuneMain handles the new rune itself, not buffered things. -func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { - escaper := enc.cfg.BackslashEscape - if escaper == nil { - escaper = EscapeDefault - } - var err error switch t { - - case jsonparse.RuneTypeStringChar: - err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: - // do nothing + return nil case jsonparse.RuneTypeStringEsc1: switch c { case '"', '\\', '/': @@ -515,54 +357,25 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } - err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)) + return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeShort, stackSize) case jsonparse.RuneTypeStringEscUA: enc.uhex[0] = byte(c) + return nil case jsonparse.RuneTypeStringEscUB: enc.uhex[1] = byte(c) + return nil case jsonparse.RuneTypeStringEscUC: enc.uhex[2] = byte(c) + return nil case jsonparse.RuneTypeStringEscUD: c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) - err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)) - - case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() - fallthrough - case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value - enc.wasNumber = enc.lastNonSpace.IsNumber() - switch { - case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: - t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - err = enc.out.WriteByte('\n') - default: - t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed - } + return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeUnicode, stackSize) + case jsonparse.RuneTypeError: + panic(fmt.Errorf("should not happen: handleRune called with %#v", t)) default: - err = enc.out.WriteByte(byte(c)) - } - - if t != jsonparse.RuneTypeSpace { - enc.lastNonSpace = t - if t != jsonparse.RuneTypeEOF { - enc.lastNonSpaceNonEOF = t - } - } - return err -} - -func (enc *ReEncoder) emitNlIndent() error { - if err := enc.out.WriteByte('\n'); err != nil { - return err - } - if enc.cfg.Prefix != "" { - if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil { - return err + if t > jsonparse.RuneTypeEOF { + panic(fmt.Errorf("should not happen: handleRune called with %#v", t)) } + return enc.out.HandleRune(c, t, BackslashEscapeNone, stackSize) } - for i := 0; i < enc.curIndent; i++ { - if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil { - return err - } - } - return nil } -- cgit v1.2.3-2-g168b