From 494ad195bc31ce6a65f759544355801fe357c56d Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 17 Aug 2022 15:19:12 -0600 Subject: Add more tests around trailing-newlines from the encoder --- compat/json/compat.go | 11 +++------- encode.go | 42 +++++++++++++++++++++++++++++--------- encode_test.go | 52 +++++++++++++++++++++++++++++++++++++++++++++++ reencode.go | 56 +++++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 133 insertions(+), 28 deletions(-) create mode 100644 encode_test.go diff --git a/compat/json/compat.go b/compat/json/compat.go index 8dcb266..42dd807 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -71,7 +71,7 @@ func Marshal(v any) ([]byte, error) { } type Encoder struct { - encoder lowmemjson.Encoder + encoder *lowmemjson.Encoder formatter lowmemjson.ReEncoder } @@ -86,16 +86,12 @@ func NewEncoder(w io.Writer) *Encoder { ForceTrailingNewlines: true, }, } - ret.encoder.W = &ret.formatter + ret.encoder = lowmemjson.NewEncoder(&ret.formatter) return ret } func (enc *Encoder) Encode(v any) error { - err := enc.encoder.Encode(v) - if err == nil { - err = enc.formatter.Close() - } - return convertEncodeError(err) + return convertEncodeError(enc.encoder.Encode(v)) } func (enc *Encoder) SetEscapeHTML(on bool) { @@ -103,7 +99,6 @@ func (enc *Encoder) SetEscapeHTML(on bool) { if !on { escaper = lowmemjson.EscapeDefaultNonHTMLSafe } - enc.encoder.BackslashEscape = escaper enc.formatter.BackslashEscape = escaper } diff --git a/encode.go b/encode.go index a77d8aa..44fd985 100644 --- a/encode.go +++ b/encode.go @@ -40,8 +40,25 @@ func encodeWriteString(w io.Writer, str string) { } type Encoder struct { - W io.Writer - BackslashEscape BackslashEscaper + w *ReEncoder +} + +// NewEncoder returns a new encoder. +// +// If w is an *ReEncoder, then the inner backslash-escaping of +// double-encoded ",string" tagged string values obeys the +// *ReEncoder's BackslashEscape policy. +func NewEncoder(w io.Writer) *Encoder { + re, ok := w.(*ReEncoder) + if !ok { + re = &ReEncoder{ + Out: w, + AllowMultipleValues: true, + } + } + return &Encoder{ + w: re, + } } func (enc *Encoder) Encode(obj any) (err error) { @@ -54,16 +71,21 @@ func (enc *Encoder) Encode(obj any) (err error) { } } }() - encode(enc.W, reflect.ValueOf(obj), enc.BackslashEscape, false, 0, map[any]struct{}{}) - if f, ok := enc.W.(interface{ Flush() error }); ok { - return f.Flush() - } - return nil + encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}) + return enc.w.Close() } +// Encode encodes a value to w. +// +// If w is an *ReEncoder, then the inner backslash-escaping of +// double-encoded ",string" tagged string values obeys the +// *ReEncoder's BackslashEscape policy. +// +// Does not write a trailing newline or other whitespace; if you will +// be encoding multiple values to w, then you should instead use an +// *Encoder object, as some JSON values need whitespace between them. func Encode(w io.Writer, obj any) (err error) { - enc := &Encoder{W: w} - return enc.Encode(obj) + return NewEncoder(w).Encode(obj) } var ( @@ -94,6 +116,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool encodeWriteString(w, "null") return } + // Use a sub-ReEncoder to check that it's a full element. validator := &ReEncoder{Out: w, BackslashEscape: escaper} if err := obj.EncodeJSON(validator); err != nil { panic(encodeError{&EncodeMethodError{ @@ -127,6 +150,7 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool SourceFunc: "MarshalJSON", }}) } + // Use a sub-ReEncoder to check that it's a full element. validator := &ReEncoder{Out: w, BackslashEscape: escaper} if _, err := validator.Write(dat); err != nil { panic(encodeError{err}) diff --git a/encode_test.go b/encode_test.go new file mode 100644 index 0000000..06eadf7 --- /dev/null +++ b/encode_test.go @@ -0,0 +1,52 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestEncoder(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&out) + + assert.NoError(t, enc.Encode(1)) + assert.NoError(t, enc.Encode(1)) + assert.NoError(t, enc.Encode(struct{}{})) + assert.NoError(t, enc.Encode(nil)) + assert.NoError(t, enc.Encode(1)) + assert.Equal(t, "1\n1{}null1", out.String()) +} + +func TestEncoderIndent(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&ReEncoder{ + Out: &out, + AllowMultipleValues: true, + Indent: "\t", + }) + + assert.NoError(t, enc.Encode(1)) + assert.NoError(t, enc.Encode(1)) + assert.NoError(t, enc.Encode(struct{}{})) + assert.NoError(t, enc.Encode([]int{9})) + assert.NoError(t, enc.Encode(nil)) + assert.NoError(t, enc.Encode(1)) + assert.Equal(t, "1\n1\n{}\n[\n\t9\n]\nnull\n1", out.String()) +} + +func TestEncode(t *testing.T) { + var out strings.Builder + + assert.NoError(t, Encode(&out, 1)) + assert.NoError(t, Encode(&out, 1)) + assert.NoError(t, Encode(&out, struct{}{})) + assert.NoError(t, Encode(&out, nil)) + assert.NoError(t, Encode(&out, 1)) + assert.Equal(t, "11{}null1", out.String()) +} diff --git a/reencode.go b/reencode.go index bcb3932..4c62cfc 100644 --- a/reencode.go +++ b/reencode.go @@ -18,13 +18,24 @@ type ReEncoder struct { AllowMultipleValues bool // Whether to minify the JSON. + // + // Trims all whitespace, except that it emits a newline + // between two *number* top-level values (or puts a newline + // after all top-level values if ForceTrailingNewlines). + // + // Trims superflous 0s from numbers. Compact bool // String to use to indent; ignored if Compact is true. + // + // Newlines are emitted *between* top-level values; a newline is + // not emitted after the *last* top-level value (unless + // ForceTrailingNewlines is on). Indent string // String to put before indents. Prefix string - // Whether to emit a newline after each top-level value, even - // if it could unambiguously be omitted. + // Whether to emit a newline after each top-level value. See + // the comments on Compact and Indent for discussion of how + // this is different than the usual behavior. ForceTrailingNewlines bool // Returns whether a given character in a string should be // backslash-escaped. The bool argument is whether it was @@ -46,6 +57,7 @@ type ReEncoder struct { // state: .handleRune lastNonSpace RuneType + wasNumber bool curIndent int uhex [4]byte // "\uABCD"-encoded characters in strings fracZeros int64 @@ -79,6 +91,10 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { return len(p), nil } +// Close does what you'd expect, mostly. +// +// The *ReEncoder may continue to be written to with new JSON values +// if enc.AllowMultipleValues is set. func (enc *ReEncoder) Close() error { if enc.bufLen > 0 { return &ReEncodeSyntaxError{ @@ -93,7 +109,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if err := enc.handleRune(0, 0); err != nil { + if err := enc.handleRune(0, RuneTypeError); err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, Offset: enc.inputPos, @@ -159,6 +175,22 @@ func (enc *ReEncoder) handleRune(c rune, t RuneType) error { enc.lastNonSpace = t }() + // emit newlines between top-level values + if enc.lastNonSpace == RuneTypeEOF { + switch { + case enc.wasNumber && t.IsNumber(): + if err := enc.emitByte('\n'); err != nil { + return err + } + case enc.Indent != "" && !enc.Compact: + if err := enc.emitByte('\n'); err != nil { + return err + } + default: + // do nothing + } + } + // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits case RuneTypeNumberFracDot: @@ -270,16 +302,18 @@ func (enc *ReEncoder) handleRune(c rune, t RuneType) error { rune(enc.uhex[3])<<0 return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) - case RuneTypeEOF: // start of next top-level value - if !enc.ForceTrailingNewlines && (enc.Compact || enc.Indent == "") && !enc.lastNonSpace.IsNumber() { - return nil - } - return enc.emitByte('\n') - case RuneTypeError: // .Close() - if !enc.ForceTrailingNewlines { + case RuneTypeError: // EOF explicitly stated by .Close() + fallthrough + case RuneTypeEOF: // EOF implied by the start of the next top-level value + enc.wasNumber = enc.lastNonSpace.IsNumber() + switch { + case enc.ForceTrailingNewlines: + t = RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) + return enc.emitByte('\n') + default: + t = RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed return nil } - return enc.emitByte('\n') default: return enc.emitByte(byte(c)) } -- cgit v1.2.3-2-g168b