From cbf8ec9ae3212e9642385c034fe0b0846af6dfd0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 27 Jan 2023 13:12:35 -0700 Subject: BREAKING CHANGE: reencode: Separate config from state --- README.md | 21 +++------- ReleaseNotes.md | 22 +++++++++++ compat/json/compat.go | 38 ++++++++---------- compat/json/compat_test.go | 7 ++-- encode.go | 9 ++--- encode_test.go | 5 +-- reencode.go | 96 +++++++++++++++++++++++++++------------------- reencode_test.go | 14 +++---- 8 files changed, 113 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index fcb46fa..108f3dc 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ those types should decode identically with `lowmemjson`. Given types that encode as desired with `encoding/json`, those types should encode identically with `lowmemjson` (assuming an appropriately configured `ReEncoder` to match the whitespace-handling and special-character -escaping; a `ReEncoder` with `Compact=true` and all other settings -left as zero will match the behavior of `json.Marshal`). +escaping; a `ReEncoderConfig` with `Compact=true` and all other +settings left as zero will match the behavior of `json.Marshal`). For better memory usage: - Instead of implementing [`json.Marshaler`][], consider implementing @@ -95,6 +95,7 @@ types that go with it: + `type EncodeMethodError` 3. `type ReEncoder` + + `type ReEncoderConfig` + `type ReEncodeSyntaxError` + `type BackslashEscaper` * `type BackslashEscapeMode` @@ -108,25 +109,13 @@ A `*ReEncoder` handles transforming a JSON stream; this is useful for prettifying, minifying, sanitizing, and/or validating JSON. A `*ReEncoder` wraps an `io.Writer`, itself implementing `io.Writer`. The most common use of it will be something along the lines of - -```go -out = &ReEncoder{ - Out: out, - // settings here -} -``` +`out = lowmemjson.NewReEncoder(out, lowmemjson.ReEncoderConfig{…})`. An `*Encoder` handles encoding Go values into a JSON stream. `*Encoder` doesn't take much care in to making its output nice; so it is usually desirable to have the output stream of an `*Encoder` be a `*ReEncoder`; the most common use of it will be - -```go -lowmemjson.NewEncoder(&lowmemjson.ReEncoder{ - Out: out, - // settings here -}).Encode(val) -``` +`lowmemjson.NewEncoder(lowmemjson.NewReEncoder(out, lowmemjson.ReEncoderConfig{…})).Encode(val)`. `*Encoder` and `*ReEncoder` both tend to make many small writes; if writes are syscalls, you may want to wrap their output in a diff --git a/ReleaseNotes.md b/ReleaseNotes.md index e4327db..874aeec 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,3 +1,25 @@ +# v0.3.0 (TBD) + + Theme: Breaking changes + + This release makes a breaking change to the way *ReEncoder works. + This change both better fits what's convenient to use, and enables + making future performance improvements. + + Breaking changes: + + - ReEncoder: Instead of instantiating a `*ReEncoder` with + + ```go + reenc := &lowmemjson.ReEncoder{Out: w, settings} + ``` + + it is now instantiated with + + ```go + reenc := lowmemjson.NewReEncoder(w, lowmemjson.ReEncoderConfig{settings}) + ``` + # v0.2.1 (2023-01-30) Theme: Code quality diff --git a/compat/json/compat.go b/compat/json/compat.go index 48d708b..688b35c 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -50,45 +50,42 @@ func convertEncodeError(err error) error { return err } -func marshal(v any, formatter *lowmemjson.ReEncoder) ([]byte, error) { +func marshal(v any, cfg lowmemjson.ReEncoderConfig) ([]byte, error) { var buf bytes.Buffer - formatter.Out = &buf - if err := convertEncodeError(lowmemjson.NewEncoder(formatter).Encode(v)); err != nil { + if err := convertEncodeError(lowmemjson.NewEncoder(lowmemjson.NewReEncoder(&buf, cfg)).Encode(v)); err != nil { return nil, err } return buf.Bytes(), nil } func MarshalIndent(v any, prefix, indent string) ([]byte, error) { - return marshal(v, &lowmemjson.ReEncoder{ + return marshal(v, lowmemjson.ReEncoderConfig{ Indent: indent, Prefix: prefix, }) } func Marshal(v any) ([]byte, error) { - return marshal(v, &lowmemjson.ReEncoder{ + return marshal(v, lowmemjson.ReEncoderConfig{ Compact: true, }) } type Encoder struct { encoder *lowmemjson.Encoder - formatter lowmemjson.ReEncoder + formatter *lowmemjson.ReEncoder } func NewEncoder(w io.Writer) *Encoder { ret := &Encoder{ - formatter: lowmemjson.ReEncoder{ - Out: w, - + formatter: lowmemjson.NewReEncoder(w, lowmemjson.ReEncoderConfig{ AllowMultipleValues: true, Compact: true, ForceTrailingNewlines: true, - }, + }), } - ret.encoder = lowmemjson.NewEncoder(&ret.formatter) + ret.encoder = lowmemjson.NewEncoder(ret.formatter) return ret } @@ -126,13 +123,11 @@ func convertReEncodeError(err error) error { } func HTMLEscape(dst *bytes.Buffer, src []byte) { - formatter := &lowmemjson.ReEncoder{ - Out: dst, - } - _, _ = formatter.Write(src) + _, _ = lowmemjson.NewReEncoder(dst, lowmemjson.ReEncoderConfig{}).Write(src) } -func reencode(src []byte, formatter *lowmemjson.ReEncoder) error { +func reencode(dst io.Writer, src []byte, cfg lowmemjson.ReEncoderConfig) error { + formatter := lowmemjson.NewReEncoder(dst, cfg) _, err := formatter.Write(src) if err == nil { err = formatter.Close() @@ -141,16 +136,14 @@ func reencode(src []byte, formatter *lowmemjson.ReEncoder) error { } func Compact(dst *bytes.Buffer, src []byte) error { - return reencode(src, &lowmemjson.ReEncoder{ - Out: dst, + return reencode(dst, src, lowmemjson.ReEncoderConfig{ Compact: true, BackslashEscape: lowmemjson.EscapePreserve, }) } func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { - return reencode(src, &lowmemjson.ReEncoder{ - Out: dst, + return reencode(dst, src, lowmemjson.ReEncoderConfig{ Indent: indent, Prefix: prefix, BackslashEscape: lowmemjson.EscapePreserve, @@ -158,10 +151,9 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { } func Valid(data []byte) bool { - formatter := &lowmemjson.ReEncoder{ - Out: io.Discard, + formatter := lowmemjson.NewReEncoder(io.Discard, lowmemjson.ReEncoderConfig{ Compact: true, - } + }) _, err := formatter.Write(data) return err == nil } diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index dac5e54..feb850b 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -14,11 +14,10 @@ import ( var parseTag = internal.ParseTag -type scanner = lowmemjson.ReEncoder +type scanner = lowmemjson.ReEncoderConfig -func checkValid(in []byte, scan *lowmemjson.ReEncoder) error { - scan.Out = io.Discard - return reencode(in, scan) +func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { + return reencode(io.Discard, in, *scan) } func isValidNumber(s string) bool { diff --git a/encode.go b/encode.go index 00848ed..e9c7ac6 100644 --- a/encode.go +++ b/encode.go @@ -71,10 +71,9 @@ type Encoder struct { func NewEncoder(w io.Writer) *Encoder { re, ok := w.(*ReEncoder) if !ok { - re = &ReEncoder{ - Out: w, + re = NewReEncoder(w, ReEncoderConfig{ AllowMultipleValues: true, - } + }) } return &Encoder{ w: re, @@ -137,7 +136,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool return } // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{Out: w, BackslashEscape: escaper} + validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) if err := obj.EncodeJSON(validator); err != nil { panic(encodeError{&EncodeMethodError{ Type: val.Type(), @@ -175,7 +174,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool }}) } // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{Out: w, BackslashEscape: escaper} + validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) if _, err := validator.Write(dat); err != nil { panic(encodeError{&EncodeMethodError{ Type: val.Type(), diff --git a/encode_test.go b/encode_test.go index df12582..edd8af1 100644 --- a/encode_test.go +++ b/encode_test.go @@ -27,11 +27,10 @@ func TestEncoder(t *testing.T) { func TestEncoderIndent(t *testing.T) { t.Parallel() var out strings.Builder - enc := NewEncoder(&ReEncoder{ - Out: &out, + enc := NewEncoder(NewReEncoder(&out, ReEncoderConfig{ AllowMultipleValues: true, Indent: "\t", - }) + })) assert.NoError(t, enc.Encode(1)) assert.NoError(t, enc.Encode(2)) diff --git a/reencode.go b/reencode.go index 6b9c336..876af62 100644 --- a/reencode.go +++ b/reencode.go @@ -13,29 +13,8 @@ import ( "git.lukeshu.com/go/lowmemjson/internal" ) -type speculation struct { - compactFmt ReEncoder - compactBuf bytes.Buffer - indentFmt ReEncoder - indentBuf bytes.Buffer -} - -// A ReEncoder takes a stream of JSON elements (by way of implementing -// io.Writer and WriteRune), and re-encodes the JSON, writing it to -// the .Out member. -// -// This is useful for prettifying, minifying, sanitizing, and/or -// validating JSON. -// -// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). -type ReEncoder struct { - // The output stream to write the re-encoded JSON to. - // - // A ReEncoder tends to make many small writes; if Out.Write - // calls are syscalls, then you may want to wrap Out in a - // bufio.Writer. - Out io.Writer - +// A ReEncoderConfig controls how a ReEncoder should behave. +type ReEncoderConfig struct { // A JSON document is specified to be a single JSON element; // but it is often desirable to handle streams of multiple // JSON elements. @@ -49,6 +28,7 @@ type ReEncoder struct { // // Trims superflous 0s from numbers. Compact bool + // CompactIfUnder causes the *ReEncoder to behave as if // Compact=true for individual elements if doing so would // cause that element to be under this number of bytes. @@ -58,18 +38,22 @@ type ReEncoder struct { // This has O((CompactIfUnder+1)^2) memory overhead, so set // with caution. CompactIfUnder int + // String to use to indent; ignored if Compact is true. // // Newlines are emitted *between* top-level values; a newline is // not emitted after the *last* top-level value (unless // ForceTrailingNewlines is on). Indent string + // String to put before indents. Prefix string + // Whether to emit a newline after each top-level value. See // the comments on Compact and Indent for discussion of how // this is different than the usual behavior. ForceTrailingNewlines bool + // Returns whether a given character in a string should be // backslash-escaped. The bool argument is whether it was // \u-escaped in the input. This does not affect characters @@ -77,6 +61,31 @@ type ReEncoder struct { // // If not set, then EscapeDefault is used. BackslashEscape BackslashEscaper +} + +// NewReEncoder returns a new ReEncoder instance. +// +// A ReEncoder tends to make many small writes; if Out.Write +// calls are syscalls, then you may want to wrap Out in a +// bufio.Writer. +func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { + return &ReEncoder{ + ReEncoderConfig: cfg, + out: out, + } +} + +// A ReEncoder takes a stream of JSON elements (by way of implementing +// io.Writer and WriteRune), and re-encodes the JSON, writing it to +// the .Out member. +// +// This is useful for prettifying, minifying, sanitizing, and/or +// validating JSON. +// +// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). +type ReEncoder struct { + ReEncoderConfig + out io.Writer // state: .Write's utf8-decoding buffer buf [utf8.UTFMax]byte @@ -101,6 +110,13 @@ type ReEncoder struct { } } +type speculation struct { + compactFmt ReEncoder + compactBuf bytes.Buffer + indentFmt ReEncoder + indentBuf bytes.Buffer +} + // public API ////////////////////////////////////////////////////////////////// // Write implements io.Writer; it does what you'd expect. @@ -233,8 +249,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { indentFmt: *enc, } specu.compactFmt.Compact = true - specu.compactFmt.Out = &specu.compactBuf - specu.indentFmt.Out = &specu.indentBuf + specu.compactFmt.out = &specu.compactBuf + specu.indentFmt.out = &specu.indentBuf enc.handleRuneState.specu = specu if err := specu.compactFmt.handleRuneMain(c, t); err != nil { return err @@ -262,12 +278,12 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { switch { case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent - if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.Out); err != nil { + if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil { return err } enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.Out); err != nil { + if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil { return err } enc.handleRuneState.lastNonSpace = t @@ -395,27 +411,27 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { switch t { case internal.RuneTypeStringChar: - return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeNone, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: return nil case internal.RuneTypeStringEsc1: switch c { case '"': - return enc.emit(writeStringChar(enc.Out, '"', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': - return enc.emit(writeStringChar(enc.Out, '\\', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': - return enc.emit(writeStringChar(enc.Out, '/', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': - return enc.emit(writeStringChar(enc.Out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': - return enc.emit(writeStringChar(enc.Out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': - return enc.emit(writeStringChar(enc.Out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': - return enc.emit(writeStringChar(enc.Out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': - return enc.emit(writeStringChar(enc.Out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) default: panic("should not happen") } @@ -435,7 +451,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { rune(enc.handleRuneState.uhex[1])<<8 | rune(enc.handleRuneState.uhex[2])<<4 | rune(enc.handleRuneState.uhex[3])<<0 - return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough @@ -455,7 +471,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { } func (enc *ReEncoder) emitByte(c byte) error { - err := writeByte(enc.Out, c) + err := writeByte(enc.out, c) if err == nil { enc.written++ } @@ -472,12 +488,12 @@ func (enc *ReEncoder) emitNlIndent() error { return err } if enc.Prefix != "" { - if err := enc.emit(io.WriteString(enc.Out, enc.Prefix)); err != nil { + if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil { return err } } for i := 0; i < enc.handleRuneState.curIndent; i++ { - if err := enc.emit(io.WriteString(enc.Out, enc.Indent)); err != nil { + if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil { return err } } diff --git a/reencode_test.go b/reencode_test.go index f4b962f..38f3f8f 100644 --- a/reencode_test.go +++ b/reencode_test.go @@ -14,13 +14,13 @@ import ( func TestReEncode(t *testing.T) { t.Parallel() type testcase struct { - enc ReEncoder + enc ReEncoderConfig in any exp string } testcases := map[string]testcase{ "basic": { - enc: ReEncoder{ + enc: ReEncoderConfig{ Indent: "\t", CompactIfUnder: 10, }, @@ -36,7 +36,7 @@ func TestReEncode(t *testing.T) { }`, }, "arrays1": { - enc: ReEncoder{ + enc: ReEncoderConfig{ Indent: "\t", CompactIfUnder: 10, ForceTrailingNewlines: true, @@ -64,7 +64,7 @@ func TestReEncode(t *testing.T) { `, }, "arrays2": { - enc: ReEncoder{ + enc: ReEncoderConfig{ Indent: "\t", CompactIfUnder: 10, ForceTrailingNewlines: true, @@ -92,7 +92,7 @@ func TestReEncode(t *testing.T) { `, }, "arrays3": { - enc: ReEncoder{ + enc: ReEncoderConfig{ Indent: "\t", ForceTrailingNewlines: true, }, @@ -126,9 +126,7 @@ func TestReEncode(t *testing.T) { t.Run(tcName, func(t *testing.T) { t.Parallel() var out strings.Builder - fmter := tc.enc - fmter.Out = &out - enc := NewEncoder(&fmter) + enc := NewEncoder(NewReEncoder(&out, tc.enc)) assert.NoError(t, enc.Encode(tc.in)) assert.Equal(t, tc.exp, out.String()) }) -- cgit v1.1-4-g5e80