From cbd5679fa554573506318deb62f5648dbffe027e Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 17 Aug 2022 19:08:15 -0600 Subject: reencode: Implement CompactIfUnder --- reencode.go | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ reencode_test.go | 36 ++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 reencode_test.go diff --git a/reencode.go b/reencode.go index 8424168..46a8e48 100644 --- a/reencode.go +++ b/reencode.go @@ -5,6 +5,7 @@ package lowmemjson import ( + "bytes" "fmt" "io" "unicode/utf8" @@ -12,6 +13,14 @@ import ( type reencodeState func(rune) error +type speculation struct { + compactFmt ReEncoder + compactBuf bytes.Buffer + indentFmt ReEncoder + indentBuf bytes.Buffer +} + +// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). type ReEncoder struct { Out io.Writer @@ -25,6 +34,15 @@ type ReEncoder struct { // // Trims superflous 0s from numbers. Compact bool + // CompactIfUnder causes the *ReEncoder to behave as if + // Compact=true for individual elements if doing so would + // cause that element to be under this number of bytes. + // + // Has no affect if Compact is true or Indent is empty. + // + // This has O((CompactIfUnder+1)^2) memory overhead, so set + // with caution. + CompactIfUnder int // String to use to indent; ignored if Compact is true. // // Newlines are emitted *between* top-level values; a newline is @@ -63,6 +81,8 @@ type ReEncoder struct { uhex [4]byte // "\uABCD"-encoded characters in strings fracZeros int64 expZero bool + + specu *speculation } } @@ -165,6 +185,63 @@ rehandle: // internal //////////////////////////////////////////////////////////////////// func (enc *ReEncoder) handleRune(c rune, t RuneType) error { + if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { + return enc.handleRuneNoSpeculation(c, t) + } + + // main + if enc.handleRuneState.specu == nil { // not speculating + switch t { + case RuneTypeObjectBeg, RuneTypeArrayBeg: // start speculating + if err, _ := enc.handleRunePre(c, t); err != nil { + return err + } + specu := &speculation{ + compactFmt: *enc, + indentFmt: *enc, + } + specu.compactFmt.Compact = true + specu.compactFmt.Out = &specu.compactBuf + specu.indentFmt.Out = &specu.indentBuf + enc.handleRuneState.specu = specu + if err := specu.compactFmt.handleRuneMain(c, t); err != nil { + return err + } + if err := specu.indentFmt.handleRuneMain(c, t); err != nil { + return err + } + default: + if err := enc.handleRuneNoSpeculation(c, t); err != nil { + return err + } + } + } else { // speculating + if err := enc.handleRuneState.specu.compactFmt.handleRune(c, t); err != nil { + return err + } + if err := enc.handleRuneState.specu.indentFmt.handleRune(c, t); err != nil { + return err + } + switch { + case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent + if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.Out); err != nil { + return err + } + enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState + case t == RuneTypeObjectEnd || t == RuneTypeArrayEnd: // stop speculating; use compact + if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.Out); err != nil { + return err + } + enc.handleRuneState.lastNonSpace = t + enc.handleRuneState.curIndent-- + enc.handleRuneState.specu = nil + } + } + + return nil +} + +func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t RuneType) error { err, shouldHandle := enc.handleRunePre(c, t) if err != nil { return err diff --git a/reencode_test.go b/reencode_test.go new file mode 100644 index 0000000..5fe66db --- /dev/null +++ b/reencode_test.go @@ -0,0 +1,36 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReEncodeCompactIfUnder(t *testing.T) { + var out strings.Builder + enc := NewEncoder(&ReEncoder{ + Out: &out, + AllowMultipleValues: true, + Indent: "\t", + CompactIfUnder: 10, + }) + + obj := map[string][]string{ + "a": []string{"b", "c"}, + "d": []string{"eeeeeeeeeeeeeee"}, + } + + assert.NoError(t, enc.Encode(obj)) + exp := `{ + "a": ["b","c"], + "d": [ + "eeeeeeeeeeeeeee" + ] +}` + assert.Equal(t, exp, out.String()) +} -- cgit v1.1-4-g5e80