diff options
Diffstat (limited to 'reencode.go')
-rw-r--r-- | reencode.go | 181 |
1 files changed, 76 insertions, 105 deletions
diff --git a/reencode.go b/reencode.go index d19dc1a..f18888c 100644 --- a/reencode.go +++ b/reencode.go @@ -12,6 +12,7 @@ import ( "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -71,32 +72,31 @@ type ReEncoderConfig struct { // bufio.Writer. func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ - ReEncoderConfig: cfg, - out: fastio.NewAllWriter(out), - specu: new(speculation), + cfg: cfg, + out: fastio.NewAllWriter(out), + specu: new(speculation), } } // A ReEncoder takes a stream of JSON elements (by way of implementing -// io.Writer and WriteRune), and re-encodes the JSON, writing it to -// the .Out member. +// io.Writer, io.StringWriter, io.ByteWriter, and WriteRune), and +// re-encodes the JSON, writing it to the .Out member. // // This is useful for prettifying, minifying, sanitizing, and/or // validating JSON. // // The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { - ReEncoderConfig + cfg ReEncoderConfig out fastio.AllWriter - // state: .Write's and .WriteString's utf8-decoding buffer + // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int - // state: .WriteRune + // state: contract between the public API and .handleRune err error par jsonparse.Parser - written int inputPos int64 // state: .handleRune @@ -104,7 +104,7 @@ type ReEncoder struct { lastNonSpaceNonEOF jsonparse.RuneType wasNumber bool curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings + uhex [3]byte // "\uABCD"-encoded characters in strings fracZeros int64 expZero bool specu *speculation @@ -165,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRune(p[n:]) { c, size := utf8.DecodeRune(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } @@ -192,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRuneInString(p[n:]) { c, size := utf8.DecodeRuneInString(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } - enc.bufLen = copy(enc.buf[:], p[n:]) return len(p), nil } @@ -212,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error { return fastio.WriteByte(enc, b) } +// WriteRune does what you'd expect. +func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { + return fastio.WriteRune(enc, c) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -231,40 +239,21 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } return enc.err } - if enc.AllowMultipleValues { + if enc.cfg.AllowMultipleValues { enc.par.Reset() } } return nil } -// WriteRune writes a single Unicode code point, returning the number -// of bytes written to the output stream and any error. -// -// Even when there is no error, the number of bytes written may be -// zero (for example, when the rune is whitespace and the ReEncoder is -// minifying the JSON), or it may be substantially longer than one -// code point's worth (for example, when `\uXXXX` escaping a character -// in a string, or when outputing extra whitespace when the ReEncoder -// is prettifying the JSON). -func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { - if enc.err != nil { - return 0, enc.err - } - if enc.bufLen > 0 { - enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen]) - return 0, enc.err - } - - enc.written = 0 - +func (enc *ReEncoder) handleRune(c rune) { rehandle: t, err := enc.par.HandleRune(c) if err != nil { @@ -272,17 +261,17 @@ rehandle: Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } - if err := enc.handleRune(c, t, enc.stackSize()); err != nil { + if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } if t == jsonparse.RuneTypeEOF { - if enc.AllowMultipleValues && len(enc.barriers) == 0 { + if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { enc.par.Reset() goto rehandle } else { @@ -290,12 +279,11 @@ rehandle: Err: fmt.Errorf("invalid character %q after top-level value", c), Offset: enc.inputPos, } - return enc.written, enc.err + return } } enc.inputPos += int64(utf8.RuneLen(c)) - return enc.written, enc.err } // semi-public API ///////////////////////////////////////////////////////////// @@ -326,8 +314,8 @@ func (enc *ReEncoder) stackSize() int { return sz } -func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error { - if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { +func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error { + if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -341,10 +329,10 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er enc.specu.speculating = true enc.specu.endWhenStackSize = stackSize - 1 enc.specu.fmt = ReEncoder{ - ReEncoderConfig: enc.ReEncoderConfig, - out: &enc.specu.compact, + cfg: enc.cfg, + out: &enc.specu.compact, } - enc.specu.fmt.Compact = true + enc.specu.fmt.cfg.Compact = true enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, @@ -364,18 +352,18 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er t: t, stackSize: stackSize, }) - if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { + if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil { return err } switch { - case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent buf := append([]inputTuple(nil), enc.specu.buf...) enc.specu.Reset() if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } for _, tuple := range buf[1:] { - if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil { return err } } @@ -410,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) if enc.lastNonSpace == jsonparse.RuneTypeEOF { switch { case enc.wasNumber && t.IsNumber(): - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err, false } - case enc.Indent != "" && !enc.Compact: - if err := enc.emitByte('\n'); err != nil { + case enc.cfg.Indent != "" && !enc.cfg.Compact: + if err := enc.out.WriteByte('\n'); err != nil { return err, false } } @@ -432,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) fallthrough default: for enc.fracZeros > 0 { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.fracZeros-- @@ -448,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) enc.expZero = false default: if enc.expZero { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.expZero = false @@ -457,11 +445,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // whitespace switch { - case enc.Compact: + case enc.cfg.Compact: if t == jsonparse.RuneTypeSpace { return nil, false } - case enc.Indent != "": + case enc.cfg.Indent != "": switch t { case jsonparse.RuneTypeSpace: // let us manage whitespace, don't pass it through @@ -483,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) return err, false } case jsonparse.RuneTypeObjectColon: - if err := enc.emitByte(' '); err != nil { + if err := enc.out.WriteByte(' '); err != nil { return err, false } } @@ -499,62 +487,58 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { + escaper := enc.cfg.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } var err error switch t { case jsonparse.RuneTypeStringChar: - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing case jsonparse.RuneTypeStringEsc1: switch c { - case '"': - err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) - case '\\': - err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) - case '/': - err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + case '"', '\\', '/': + // self case 'b': - err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + c = '\b' case 'f': - err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + c = '\f' case 'n': - err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + c = '\n' case 'r': - err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + c = '\r' case 't': - err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + c = '\t' default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)) case jsonparse.RuneTypeStringEscUA: - enc.uhex[0], _ = jsonparse.HexToInt(c) + enc.uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - enc.uhex[1], _ = jsonparse.HexToInt(c) + enc.uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - enc.uhex[2], _ = jsonparse.HexToInt(c) + enc.uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - enc.uhex[3], _ = jsonparse.HexToInt(c) - c := 0 | - rune(enc.uhex[0])<<12 | - rune(enc.uhex[1])<<8 | - rune(enc.uhex[2])<<4 | - rune(enc.uhex[3])<<0 - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines && len(enc.barriers) == 0: + case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - err = enc.emitByte('\n') + err = enc.out.WriteByte('\n') default: t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed } default: - err = enc.emitByte(byte(c)) + err = enc.out.WriteByte(byte(c)) } if t != jsonparse.RuneTypeSpace { @@ -566,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { return err } -func (enc *ReEncoder) emitByte(c byte) error { - err := enc.out.WriteByte(c) - if err == nil { - enc.written++ - } - return err -} - -func (enc *ReEncoder) emit(n int, err error) error { - enc.written += n - return err -} - func (enc *ReEncoder) emitNlIndent() error { - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err } - if enc.Prefix != "" { - if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil { + if enc.cfg.Prefix != "" { + if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil { return err } } for i := 0; i < enc.curIndent; i++ { - if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { + if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil { return err } } |