summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-01-30 21:58:51 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-01-30 21:58:51 -0700
commitab321d3f90b9a1b4c00b04be26867d9a03809259 (patch)
tree7f4a64fa5365ffd3b285f921c5f905754b3883b3
parentd473f861a5c3a3112c83518eafbcda50e274b182 (diff)
parentcbf8ec9ae3212e9642385c034fe0b0846af6dfd0 (diff)
Merge branch 'lukeshu/break'
-rw-r--r--README.md21
-rw-r--r--ReleaseNotes.md22
-rw-r--r--compat/json/compat.go38
-rw-r--r--compat/json/compat_test.go7
-rw-r--r--encode.go9
-rw-r--r--encode_test.go5
-rw-r--r--reencode.go96
-rw-r--r--reencode_test.go14
8 files changed, 113 insertions, 99 deletions
diff --git a/README.md b/README.md
index fcb46fa..108f3dc 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,8 @@ those types should decode identically with `lowmemjson`. Given types
that encode as desired with `encoding/json`, those types should encode
identically with `lowmemjson` (assuming an appropriately configured
`ReEncoder` to match the whitespace-handling and special-character
-escaping; a `ReEncoder` with `Compact=true` and all other settings
-left as zero will match the behavior of `json.Marshal`).
+escaping; a `ReEncoderConfig` with `Compact=true` and all other
+settings left as zero will match the behavior of `json.Marshal`).
For better memory usage:
- Instead of implementing [`json.Marshaler`][], consider implementing
@@ -95,6 +95,7 @@ types that go with it:
+ `type EncodeMethodError`
3. `type ReEncoder`
+ + `type ReEncoderConfig`
+ `type ReEncodeSyntaxError`
+ `type BackslashEscaper`
* `type BackslashEscapeMode`
@@ -108,25 +109,13 @@ A `*ReEncoder` handles transforming a JSON stream; this is useful for
prettifying, minifying, sanitizing, and/or validating JSON. A
`*ReEncoder` wraps an `io.Writer`, itself implementing `io.Writer`.
The most common use of it will be something along the lines of
-
-```go
-out = &ReEncoder{
- Out: out,
- // settings here
-}
-```
+`out = lowmemjson.NewReEncoder(out, lowmemjson.ReEncoderConfig{…})`.
An `*Encoder` handles encoding Go values into a JSON stream.
`*Encoder` doesn't take much care in to making its output nice; so it
is usually desirable to have the output stream of an `*Encoder` be a `*ReEncoder`; the most
common use of it will be
-
-```go
-lowmemjson.NewEncoder(&lowmemjson.ReEncoder{
- Out: out,
- // settings here
-}).Encode(val)
-```
+`lowmemjson.NewEncoder(lowmemjson.NewReEncoder(out, lowmemjson.ReEncoderConfig{…})).Encode(val)`.
`*Encoder` and `*ReEncoder` both tend to make many small writes; if
writes are syscalls, you may want to wrap their output in a
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index e4327db..874aeec 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,3 +1,25 @@
+# v0.3.0 (TBD)
+
+ Theme: Breaking changes
+
+ This release makes a breaking change to the way *ReEncoder works.
+ This change both better fits what's convenient to use, and enables
+ making future performance improvements.
+
+ Breaking changes:
+
+ - ReEncoder: Instead of instantiating a `*ReEncoder` with
+
+ ```go
+ reenc := &lowmemjson.ReEncoder{Out: w, settings}
+ ```
+
+ it is now instantiated with
+
+ ```go
+ reenc := lowmemjson.NewReEncoder(w, lowmemjson.ReEncoderConfig{settings})
+ ```
+
# v0.2.1 (2023-01-30)
Theme: Code quality
diff --git a/compat/json/compat.go b/compat/json/compat.go
index 48d708b..688b35c 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -50,45 +50,42 @@ func convertEncodeError(err error) error {
return err
}
-func marshal(v any, formatter *lowmemjson.ReEncoder) ([]byte, error) {
+func marshal(v any, cfg lowmemjson.ReEncoderConfig) ([]byte, error) {
var buf bytes.Buffer
- formatter.Out = &buf
- if err := convertEncodeError(lowmemjson.NewEncoder(formatter).Encode(v)); err != nil {
+ if err := convertEncodeError(lowmemjson.NewEncoder(lowmemjson.NewReEncoder(&buf, cfg)).Encode(v)); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func MarshalIndent(v any, prefix, indent string) ([]byte, error) {
- return marshal(v, &lowmemjson.ReEncoder{
+ return marshal(v, lowmemjson.ReEncoderConfig{
Indent: indent,
Prefix: prefix,
})
}
func Marshal(v any) ([]byte, error) {
- return marshal(v, &lowmemjson.ReEncoder{
+ return marshal(v, lowmemjson.ReEncoderConfig{
Compact: true,
})
}
type Encoder struct {
encoder *lowmemjson.Encoder
- formatter lowmemjson.ReEncoder
+ formatter *lowmemjson.ReEncoder
}
func NewEncoder(w io.Writer) *Encoder {
ret := &Encoder{
- formatter: lowmemjson.ReEncoder{
- Out: w,
-
+ formatter: lowmemjson.NewReEncoder(w, lowmemjson.ReEncoderConfig{
AllowMultipleValues: true,
Compact: true,
ForceTrailingNewlines: true,
- },
+ }),
}
- ret.encoder = lowmemjson.NewEncoder(&ret.formatter)
+ ret.encoder = lowmemjson.NewEncoder(ret.formatter)
return ret
}
@@ -126,13 +123,11 @@ func convertReEncodeError(err error) error {
}
func HTMLEscape(dst *bytes.Buffer, src []byte) {
- formatter := &lowmemjson.ReEncoder{
- Out: dst,
- }
- _, _ = formatter.Write(src)
+ _, _ = lowmemjson.NewReEncoder(dst, lowmemjson.ReEncoderConfig{}).Write(src)
}
-func reencode(src []byte, formatter *lowmemjson.ReEncoder) error {
+func reencode(dst io.Writer, src []byte, cfg lowmemjson.ReEncoderConfig) error {
+ formatter := lowmemjson.NewReEncoder(dst, cfg)
_, err := formatter.Write(src)
if err == nil {
err = formatter.Close()
@@ -141,16 +136,14 @@ func reencode(src []byte, formatter *lowmemjson.ReEncoder) error {
}
func Compact(dst *bytes.Buffer, src []byte) error {
- return reencode(src, &lowmemjson.ReEncoder{
- Out: dst,
+ return reencode(dst, src, lowmemjson.ReEncoderConfig{
Compact: true,
BackslashEscape: lowmemjson.EscapePreserve,
})
}
func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
- return reencode(src, &lowmemjson.ReEncoder{
- Out: dst,
+ return reencode(dst, src, lowmemjson.ReEncoderConfig{
Indent: indent,
Prefix: prefix,
BackslashEscape: lowmemjson.EscapePreserve,
@@ -158,10 +151,9 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
}
func Valid(data []byte) bool {
- formatter := &lowmemjson.ReEncoder{
- Out: io.Discard,
+ formatter := lowmemjson.NewReEncoder(io.Discard, lowmemjson.ReEncoderConfig{
Compact: true,
- }
+ })
_, err := formatter.Write(data)
return err == nil
}
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index dac5e54..feb850b 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -14,11 +14,10 @@ import (
var parseTag = internal.ParseTag
-type scanner = lowmemjson.ReEncoder
+type scanner = lowmemjson.ReEncoderConfig
-func checkValid(in []byte, scan *lowmemjson.ReEncoder) error {
- scan.Out = io.Discard
- return reencode(in, scan)
+func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error {
+ return reencode(io.Discard, in, *scan)
}
func isValidNumber(s string) bool {
diff --git a/encode.go b/encode.go
index 00848ed..e9c7ac6 100644
--- a/encode.go
+++ b/encode.go
@@ -71,10 +71,9 @@ type Encoder struct {
func NewEncoder(w io.Writer) *Encoder {
re, ok := w.(*ReEncoder)
if !ok {
- re = &ReEncoder{
- Out: w,
+ re = NewReEncoder(w, ReEncoderConfig{
AllowMultipleValues: true,
- }
+ })
}
return &Encoder{
w: re,
@@ -137,7 +136,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool
return
}
// Use a sub-ReEncoder to check that it's a full element.
- validator := &ReEncoder{Out: w, BackslashEscape: escaper}
+ validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper})
if err := obj.EncodeJSON(validator); err != nil {
panic(encodeError{&EncodeMethodError{
Type: val.Type(),
@@ -175,7 +174,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool
}})
}
// Use a sub-ReEncoder to check that it's a full element.
- validator := &ReEncoder{Out: w, BackslashEscape: escaper}
+ validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper})
if _, err := validator.Write(dat); err != nil {
panic(encodeError{&EncodeMethodError{
Type: val.Type(),
diff --git a/encode_test.go b/encode_test.go
index df12582..edd8af1 100644
--- a/encode_test.go
+++ b/encode_test.go
@@ -27,11 +27,10 @@ func TestEncoder(t *testing.T) {
func TestEncoderIndent(t *testing.T) {
t.Parallel()
var out strings.Builder
- enc := NewEncoder(&ReEncoder{
- Out: &out,
+ enc := NewEncoder(NewReEncoder(&out, ReEncoderConfig{
AllowMultipleValues: true,
Indent: "\t",
- })
+ }))
assert.NoError(t, enc.Encode(1))
assert.NoError(t, enc.Encode(2))
diff --git a/reencode.go b/reencode.go
index 6b9c336..876af62 100644
--- a/reencode.go
+++ b/reencode.go
@@ -13,29 +13,8 @@ import (
"git.lukeshu.com/go/lowmemjson/internal"
)
-type speculation struct {
- compactFmt ReEncoder
- compactBuf bytes.Buffer
- indentFmt ReEncoder
- indentBuf bytes.Buffer
-}
-
-// A ReEncoder takes a stream of JSON elements (by way of implementing
-// io.Writer and WriteRune), and re-encodes the JSON, writing it to
-// the .Out member.
-//
-// This is useful for prettifying, minifying, sanitizing, and/or
-// validating JSON.
-//
-// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth).
-type ReEncoder struct {
- // The output stream to write the re-encoded JSON to.
- //
- // A ReEncoder tends to make many small writes; if Out.Write
- // calls are syscalls, then you may want to wrap Out in a
- // bufio.Writer.
- Out io.Writer
-
+// A ReEncoderConfig controls how a ReEncoder should behave.
+type ReEncoderConfig struct {
// A JSON document is specified to be a single JSON element;
// but it is often desirable to handle streams of multiple
// JSON elements.
@@ -49,6 +28,7 @@ type ReEncoder struct {
//
// Trims superflous 0s from numbers.
Compact bool
+
// CompactIfUnder causes the *ReEncoder to behave as if
// Compact=true for individual elements if doing so would
// cause that element to be under this number of bytes.
@@ -58,18 +38,22 @@ type ReEncoder struct {
// This has O((CompactIfUnder+1)^2) memory overhead, so set
// with caution.
CompactIfUnder int
+
// String to use to indent; ignored if Compact is true.
//
// Newlines are emitted *between* top-level values; a newline is
// not emitted after the *last* top-level value (unless
// ForceTrailingNewlines is on).
Indent string
+
// String to put before indents.
Prefix string
+
// Whether to emit a newline after each top-level value. See
// the comments on Compact and Indent for discussion of how
// this is different than the usual behavior.
ForceTrailingNewlines bool
+
// Returns whether a given character in a string should be
// backslash-escaped. The bool argument is whether it was
// \u-escaped in the input. This does not affect characters
@@ -77,6 +61,31 @@ type ReEncoder struct {
//
// If not set, then EscapeDefault is used.
BackslashEscape BackslashEscaper
+}
+
+// NewReEncoder returns a new ReEncoder instance.
+//
+// A ReEncoder tends to make many small writes; if Out.Write
+// calls are syscalls, then you may want to wrap Out in a
+// bufio.Writer.
+func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
+ return &ReEncoder{
+ ReEncoderConfig: cfg,
+ out: out,
+ }
+}
+
+// A ReEncoder takes a stream of JSON elements (by way of implementing
+// io.Writer and WriteRune), and re-encodes the JSON, writing it to
+// the .Out member.
+//
+// This is useful for prettifying, minifying, sanitizing, and/or
+// validating JSON.
+//
+// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth).
+type ReEncoder struct {
+ ReEncoderConfig
+ out io.Writer
// state: .Write's utf8-decoding buffer
buf [utf8.UTFMax]byte
@@ -101,6 +110,13 @@ type ReEncoder struct {
}
}
+type speculation struct {
+ compactFmt ReEncoder
+ compactBuf bytes.Buffer
+ indentFmt ReEncoder
+ indentBuf bytes.Buffer
+}
+
// public API //////////////////////////////////////////////////////////////////
// Write implements io.Writer; it does what you'd expect.
@@ -233,8 +249,8 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error {
indentFmt: *enc,
}
specu.compactFmt.Compact = true
- specu.compactFmt.Out = &specu.compactBuf
- specu.indentFmt.Out = &specu.indentBuf
+ specu.compactFmt.out = &specu.compactBuf
+ specu.indentFmt.out = &specu.indentBuf
enc.handleRuneState.specu = specu
if err := specu.compactFmt.handleRuneMain(c, t); err != nil {
return err
@@ -262,12 +278,12 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error {
switch {
case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent
- if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.Out); err != nil {
+ if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil {
return err
}
enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState
case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact
- if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.Out); err != nil {
+ if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil {
return err
}
enc.handleRuneState.lastNonSpace = t
@@ -395,27 +411,27 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error {
switch t {
case internal.RuneTypeStringChar:
- return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeNone, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape))
case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU:
return nil
case internal.RuneTypeStringEsc1:
switch c {
case '"':
- return enc.emit(writeStringChar(enc.Out, '"', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape))
case '\\':
- return enc.emit(writeStringChar(enc.Out, '\\', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape))
case '/':
- return enc.emit(writeStringChar(enc.Out, '/', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape))
case 'b':
- return enc.emit(writeStringChar(enc.Out, '\b', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape))
case 'f':
- return enc.emit(writeStringChar(enc.Out, '\f', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape))
case 'n':
- return enc.emit(writeStringChar(enc.Out, '\n', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape))
case 'r':
- return enc.emit(writeStringChar(enc.Out, '\r', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape))
case 't':
- return enc.emit(writeStringChar(enc.Out, '\t', BackslashEscapeShort, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape))
default:
panic("should not happen")
}
@@ -435,7 +451,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error {
rune(enc.handleRuneState.uhex[1])<<8 |
rune(enc.handleRuneState.uhex[2])<<4 |
rune(enc.handleRuneState.uhex[3])<<0
- return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape))
+ return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape))
case internal.RuneTypeError: // EOF explicitly stated by .Close()
fallthrough
@@ -455,7 +471,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error {
}
func (enc *ReEncoder) emitByte(c byte) error {
- err := writeByte(enc.Out, c)
+ err := writeByte(enc.out, c)
if err == nil {
enc.written++
}
@@ -472,12 +488,12 @@ func (enc *ReEncoder) emitNlIndent() error {
return err
}
if enc.Prefix != "" {
- if err := enc.emit(io.WriteString(enc.Out, enc.Prefix)); err != nil {
+ if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil {
return err
}
}
for i := 0; i < enc.handleRuneState.curIndent; i++ {
- if err := enc.emit(io.WriteString(enc.Out, enc.Indent)); err != nil {
+ if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil {
return err
}
}
diff --git a/reencode_test.go b/reencode_test.go
index f4b962f..38f3f8f 100644
--- a/reencode_test.go
+++ b/reencode_test.go
@@ -14,13 +14,13 @@ import (
func TestReEncode(t *testing.T) {
t.Parallel()
type testcase struct {
- enc ReEncoder
+ enc ReEncoderConfig
in any
exp string
}
testcases := map[string]testcase{
"basic": {
- enc: ReEncoder{
+ enc: ReEncoderConfig{
Indent: "\t",
CompactIfUnder: 10,
},
@@ -36,7 +36,7 @@ func TestReEncode(t *testing.T) {
}`,
},
"arrays1": {
- enc: ReEncoder{
+ enc: ReEncoderConfig{
Indent: "\t",
CompactIfUnder: 10,
ForceTrailingNewlines: true,
@@ -64,7 +64,7 @@ func TestReEncode(t *testing.T) {
`,
},
"arrays2": {
- enc: ReEncoder{
+ enc: ReEncoderConfig{
Indent: "\t",
CompactIfUnder: 10,
ForceTrailingNewlines: true,
@@ -92,7 +92,7 @@ func TestReEncode(t *testing.T) {
`,
},
"arrays3": {
- enc: ReEncoder{
+ enc: ReEncoderConfig{
Indent: "\t",
ForceTrailingNewlines: true,
},
@@ -126,9 +126,7 @@ func TestReEncode(t *testing.T) {
t.Run(tcName, func(t *testing.T) {
t.Parallel()
var out strings.Builder
- fmter := tc.enc
- fmter.Out = &out
- enc := NewEncoder(&fmter)
+ enc := NewEncoder(NewReEncoder(&out, tc.enc))
assert.NoError(t, enc.Encode(tc.in))
assert.Equal(t, tc.exp, out.String())
})