From 237e6ed6c125658be232c259b8b7689c5b7ef966 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 24 Feb 2023 20:51:58 -0700 Subject: encode: Sort maps by the pre-string-encoded key --- ReleaseNotes.md | 10 +++++ compat/json/compat_test.go | 1 + .../json/testdata/fuzz/FuzzEquiv/8a7cd8b26dc500eb | 2 + encode.go | 45 +++++++++++++++------- 4 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/8a7cd8b26dc500eb diff --git a/ReleaseNotes.md b/ReleaseNotes.md index ae111f8..5180e93 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,3 +1,13 @@ +# v0.3.8 (2023-02-25) + + Theme: Fixes from fuzzing (part 3/?) + + User-facing changes: + + - Change: Encoder: When encoding a `map`, sort the key:value pairs + by the pre-string-encoded key rather than the post-string-encoded + key. For instance, now `""` sorts before `" "`. + # v0.3.8 (2023-02-25) Theme: Fixes from fuzzing (part 2/?) diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index af92093..52ed60e 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -154,6 +154,7 @@ func TestCompatMarshal(t *testing.T) { "non-utf8": {In: "\x85\xcd", Out: "\"\\ufffd\\ufffd\""}, "urc": {In: "\ufffd", Out: "\"\ufffd\""}, "float": {In: 1.2e3, Out: `1200`}, + "obj": {In: map[string]any{"": 1, " ": 2}, Out: `{"":1," ":2}`}, } for tcName, tc := range testcases { tc := tc diff --git a/compat/json/testdata/fuzz/FuzzEquiv/8a7cd8b26dc500eb b/compat/json/testdata/fuzz/FuzzEquiv/8a7cd8b26dc500eb new file mode 100644 index 0000000..bb896c9 --- /dev/null +++ b/compat/json/testdata/fuzz/FuzzEquiv/8a7cd8b26dc500eb @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("{\" \":{},\"\":0}") diff --git a/encode.go b/encode.go index 684cc75..f8c6915 100644 --- a/encode.go +++ b/encode.go @@ -385,34 +385,51 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, utf Inval return err } + var kBuf strings.Builder + kEnc := NewReEncoder(&kBuf, ReEncoderConfig{ + AllowMultipleValues: true, + + Compact: true, + + BackslashEscape: escaper, + InvalidUTF8: utf, + }) + type kv struct { - K string - V reflect.Value + KStr string + K reflect.Value + V reflect.Value } kvs := make([]kv, val.Len()) iter := val.MapRange() for i := 0; iter.Next(); i++ { - // TODO: Avoid buffering the map key - var k strings.Builder - if err := encode(NewReEncoder(&k, ReEncoderConfig{BackslashEscape: escaper, InvalidUTF8: utf}), iter.Key(), escaper, utf, false, cycleDepth, cycleSeen); err != nil { + if err := encode(kEnc, iter.Key(), escaper, utf, false, cycleDepth, cycleSeen); err != nil { return err } - kStr := k.String() + if err := kEnc.Close(); err != nil { + return err + } + kStr := strings.Trim(kBuf.String(), "\n") + kBuf.Reset() if kStr == "null" { - kStr = `""` + kStr = "" } - if !strings.HasPrefix(kStr, `"`) { - k.Reset() - if err := jsonstring.EncodeStringFromString(&k, escaper, utf, iter.Key(), kStr); err != nil { + + // TODO(lukeshu): Have kEnc look at the first byte, and feed directly to a decoder, + // instead of needing to buffer the whole thing twice. + if strings.HasPrefix(kStr, `"`) { + if err := DecodeString(strings.NewReader(kStr), &kBuf); err != nil { return err } - kStr = k.String() + kStr = kBuf.String() + kBuf.Reset() } - kvs[i].K = kStr + kvs[i].KStr = kStr + kvs[i].K = iter.Key() kvs[i].V = iter.Value() } sort.Slice(kvs, func(i, j int) bool { - return kvs[i].K < kvs[j].K + return kvs[i].KStr < kvs[j].KStr }) for i, kv := range kvs { @@ -421,7 +438,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, utf Inval return err } } - if _, err := w.WriteString(kv.K); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, utf, kv.K, kv.KStr); err != nil { return err } if err := w.WriteByte(':'); err != nil { -- cgit v1.2.3-2-g168b