diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-30 21:58:51 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-30 21:58:51 -0700 |
commit | ab321d3f90b9a1b4c00b04be26867d9a03809259 (patch) | |
tree | 7f4a64fa5365ffd3b285f921c5f905754b3883b3 /reencode.go | |
parent | d473f861a5c3a3112c83518eafbcda50e274b182 (diff) | |
parent | cbf8ec9ae3212e9642385c034fe0b0846af6dfd0 (diff) |
Merge branch 'lukeshu/break'
Diffstat (limited to 'reencode.go')
-rw-r--r-- | reencode.go | 96 |
1 files changed, 56 insertions, 40 deletions
diff --git a/reencode.go b/reencode.go index 6b9c336..876af62 100644 --- a/reencode.go +++ b/reencode.go @@ -13,29 +13,8 @@ import ( "git.lukeshu.com/go/lowmemjson/internal" ) -type speculation struct { - compactFmt ReEncoder - compactBuf bytes.Buffer - indentFmt ReEncoder - indentBuf bytes.Buffer -} - -// A ReEncoder takes a stream of JSON elements (by way of implementing -// io.Writer and WriteRune), and re-encodes the JSON, writing it to -// the .Out member. -// -// This is useful for prettifying, minifying, sanitizing, and/or -// validating JSON. -// -// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). -type ReEncoder struct { - // The output stream to write the re-encoded JSON to. - // - // A ReEncoder tends to make many small writes; if Out.Write - // calls are syscalls, then you may want to wrap Out in a - // bufio.Writer. - Out io.Writer - +// A ReEncoderConfig controls how a ReEncoder should behave. +type ReEncoderConfig struct { // A JSON document is specified to be a single JSON element; // but it is often desirable to handle streams of multiple // JSON elements. @@ -49,6 +28,7 @@ type ReEncoder struct { // // Trims superflous 0s from numbers. Compact bool + // CompactIfUnder causes the *ReEncoder to behave as if // Compact=true for individual elements if doing so would // cause that element to be under this number of bytes. @@ -58,18 +38,22 @@ type ReEncoder struct { // This has O((CompactIfUnder+1)^2) memory overhead, so set // with caution. CompactIfUnder int + // String to use to indent; ignored if Compact is true. // // Newlines are emitted *between* top-level values; a newline is // not emitted after the *last* top-level value (unless // ForceTrailingNewlines is on). Indent string + // String to put before indents. Prefix string + // Whether to emit a newline after each top-level value. See // the comments on Compact and Indent for discussion of how // this is different than the usual behavior. ForceTrailingNewlines bool + // Returns whether a given character in a string should be // backslash-escaped. The bool argument is whether it was // \u-escaped in the input. This does not affect characters @@ -77,6 +61,31 @@ type ReEncoder struct { // // If not set, then EscapeDefault is used. BackslashEscape BackslashEscaper +} + +// NewReEncoder returns a new ReEncoder instance. +// +// A ReEncoder tends to make many small writes; if Out.Write +// calls are syscalls, then you may want to wrap Out in a +// bufio.Writer. +func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { + return &ReEncoder{ + ReEncoderConfig: cfg, + out: out, + } +} + +// A ReEncoder takes a stream of JSON elements (by way of implementing +// io.Writer and WriteRune), and re-encodes the JSON, writing it to +// the .Out member. +// +// This is useful for prettifying, minifying, sanitizing, and/or +// validating JSON. +// +// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). +type ReEncoder struct { + ReEncoderConfig + out io.Writer // state: .Write's utf8-decoding buffer buf [utf8.UTFMax]byte @@ -101,6 +110,13 @@ type ReEncoder struct { } } +type speculation struct { + compactFmt ReEncoder + compactBuf bytes.Buffer + indentFmt ReEncoder + indentBuf bytes.Buffer +} + // public API ////////////////////////////////////////////////////////////////// // Write implements io.Writer; it does what you'd expect. @@ -233,8 +249,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { indentFmt: *enc, } specu.compactFmt.Compact = true - specu.compactFmt.Out = &specu.compactBuf - specu.indentFmt.Out = &specu.indentBuf + specu.compactFmt.out = &specu.compactBuf + specu.indentFmt.out = &specu.indentBuf enc.handleRuneState.specu = specu if err := specu.compactFmt.handleRuneMain(c, t); err != nil { return err @@ -262,12 +278,12 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { switch { case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent - if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.Out); err != nil { + if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil { return err } enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.Out); err != nil { + if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil { return err } enc.handleRuneState.lastNonSpace = t @@ -395,27 +411,27 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { switch t { case internal.RuneTypeStringChar: - return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeNone, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: return nil case internal.RuneTypeStringEsc1: switch c { case '"': - return enc.emit(writeStringChar(enc.Out, '"', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': - return enc.emit(writeStringChar(enc.Out, '\\', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': - return enc.emit(writeStringChar(enc.Out, '/', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': - return enc.emit(writeStringChar(enc.Out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': - return enc.emit(writeStringChar(enc.Out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': - return enc.emit(writeStringChar(enc.Out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': - return enc.emit(writeStringChar(enc.Out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': - return enc.emit(writeStringChar(enc.Out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) default: panic("should not happen") } @@ -435,7 +451,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { rune(enc.handleRuneState.uhex[1])<<8 | rune(enc.handleRuneState.uhex[2])<<4 | rune(enc.handleRuneState.uhex[3])<<0 - return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough @@ -455,7 +471,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { } func (enc *ReEncoder) emitByte(c byte) error { - err := writeByte(enc.Out, c) + err := writeByte(enc.out, c) if err == nil { enc.written++ } @@ -472,12 +488,12 @@ func (enc *ReEncoder) emitNlIndent() error { return err } if enc.Prefix != "" { - if err := enc.emit(io.WriteString(enc.Out, enc.Prefix)); err != nil { + if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil { return err } } for i := 0; i < enc.handleRuneState.curIndent; i++ { - if err := enc.emit(io.WriteString(enc.Out, enc.Indent)); err != nil { + if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil { return err } } |