From dfc67cecbd95344d296c31b537fa3ae8aec8c292 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 14 Feb 2023 22:36:25 -0700 Subject: encode, reencode: Fix handling of invalid UTF-8 --- compat/json/compat.go | 5 ++++- compat/json/compat_test.go | 45 ++++++++++++++++++++++++++++++++++-------- compat/json/testcompat_test.go | 5 +++-- 3 files changed, 44 insertions(+), 11 deletions(-) (limited to 'compat') diff --git a/compat/json/compat.go b/compat/json/compat.go index 1cdbf0b..d326514 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -160,6 +160,7 @@ func Compact(dst *bytes.Buffer, src []byte) error { start := dst.Len() err := reencode(dst, src, lowmemjson.ReEncoderConfig{ Compact: true, + InvalidUTF8: lowmemjson.InvalidUTF8Preserve, BackslashEscape: lowmemjson.EscapePreserve, }) if err != nil { @@ -173,6 +174,7 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { err := reencode(dst, src, lowmemjson.ReEncoderConfig{ Indent: indent, Prefix: prefix, + InvalidUTF8: lowmemjson.InvalidUTF8Preserve, BackslashEscape: lowmemjson.EscapePreserve, }) if err != nil { @@ -183,7 +185,8 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { func Valid(data []byte) bool { formatter := lowmemjson.NewReEncoder(io.Discard, lowmemjson.ReEncoderConfig{ - Compact: true, + Compact: true, + InvalidUTF8: lowmemjson.InvalidUTF8Error, }) if _, err := formatter.Write(data); err != nil { return false diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index d513c27..d989a4d 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -18,10 +18,11 @@ func TestCompatValid(t *testing.T) { Exp bool } testcases := map[string]testcase{ - "empty": {In: ``, Exp: false}, - "num": {In: `1`, Exp: true}, - "trunc": {In: `{`, Exp: false}, - "object": {In: `{}`, Exp: true}, + "empty": {In: ``, Exp: false}, + "num": {In: `1`, Exp: true}, + "trunc": {In: `{`, Exp: false}, + "object": {In: `{}`, Exp: true}, + "non-utf8": {In: "\"\x85\xcd\"", Exp: false}, // https://github.com/golang/go/issues/58517 } for tcName, tc := range testcases { tc := tc @@ -42,8 +43,9 @@ func TestCompatCompact(t *testing.T) { Err string } testcases := map[string]testcase{ - "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`}, - "object": {In: `{}`, Out: `{}`}, + "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`}, + "object": {In: `{}`, Out: `{}`}, + "non-utf8": {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""}, } for tcName, tc := range testcases { tc := tc @@ -70,8 +72,9 @@ func TestCompatIndent(t *testing.T) { Err string } testcases := map[string]testcase{ - "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`}, - "object": {In: `{}`, Out: `{}`}, + "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`}, + "object": {In: `{}`, Out: `{}`}, + "non-utf8": {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""}, } for tcName, tc := range testcases { tc := tc @@ -89,3 +92,29 @@ func TestCompatIndent(t *testing.T) { }) } } + +func TestCompatMarshal(t *testing.T) { + t.Parallel() + type testcase struct { + In any + Out string + Err string + } + testcases := map[string]testcase{ + "non-utf8": {In: "\x85\xcd", Out: "\"\\ufffd\\ufffd\""}, + "urc": {In: "\ufffd", Out: "\"\ufffd\""}, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + out, err := Marshal(tc.In) + assert.Equal(t, tc.Out, string(out)) + if tc.Err == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.Err) + } + }) + } +} diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go index 42cbf5c..e89b4b4 100644 --- a/compat/json/testcompat_test.go +++ b/compat/json/testcompat_test.go @@ -8,6 +8,7 @@ import ( "bytes" "encoding/json" "io" + "reflect" _ "unsafe" "git.lukeshu.com/go/lowmemjson" @@ -59,13 +60,13 @@ type encodeState struct { } func (es *encodeState) string(str string, _ bool) { - if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { + if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, 0, reflect.Value{}, str); err != nil { panic(err) } } func (es *encodeState) stringBytes(str []byte, _ bool) { - if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { + if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, 0, reflect.Value{}, str); err != nil { panic(err) } } -- cgit v1.2.3-2-g168b