summaryrefslogtreecommitdiff
path: root/compat
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-14 22:36:25 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-18 22:45:54 -0700
commitdfc67cecbd95344d296c31b537fa3ae8aec8c292 (patch)
tree1e2e820cbd288d1ebef7b0e9dea14a07e2f33fc5 /compat
parent38989a9c4f69abfe04c3eb4ec3382be88802141c (diff)
encode, reencode: Fix handling of invalid UTF-8
Diffstat (limited to 'compat')
-rw-r--r--compat/json/compat.go5
-rw-r--r--compat/json/compat_test.go45
-rw-r--r--compat/json/testcompat_test.go5
3 files changed, 44 insertions, 11 deletions
diff --git a/compat/json/compat.go b/compat/json/compat.go
index 1cdbf0b..d326514 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -160,6 +160,7 @@ func Compact(dst *bytes.Buffer, src []byte) error {
start := dst.Len()
err := reencode(dst, src, lowmemjson.ReEncoderConfig{
Compact: true,
+ InvalidUTF8: lowmemjson.InvalidUTF8Preserve,
BackslashEscape: lowmemjson.EscapePreserve,
})
if err != nil {
@@ -173,6 +174,7 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
err := reencode(dst, src, lowmemjson.ReEncoderConfig{
Indent: indent,
Prefix: prefix,
+ InvalidUTF8: lowmemjson.InvalidUTF8Preserve,
BackslashEscape: lowmemjson.EscapePreserve,
})
if err != nil {
@@ -183,7 +185,8 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
func Valid(data []byte) bool {
formatter := lowmemjson.NewReEncoder(io.Discard, lowmemjson.ReEncoderConfig{
- Compact: true,
+ Compact: true,
+ InvalidUTF8: lowmemjson.InvalidUTF8Error,
})
if _, err := formatter.Write(data); err != nil {
return false
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index d513c27..d989a4d 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -18,10 +18,11 @@ func TestCompatValid(t *testing.T) {
Exp bool
}
testcases := map[string]testcase{
- "empty": {In: ``, Exp: false},
- "num": {In: `1`, Exp: true},
- "trunc": {In: `{`, Exp: false},
- "object": {In: `{}`, Exp: true},
+ "empty": {In: ``, Exp: false},
+ "num": {In: `1`, Exp: true},
+ "trunc": {In: `{`, Exp: false},
+ "object": {In: `{}`, Exp: true},
+ "non-utf8": {In: "\"\x85\xcd\"", Exp: false}, // https://github.com/golang/go/issues/58517
}
for tcName, tc := range testcases {
tc := tc
@@ -42,8 +43,9 @@ func TestCompatCompact(t *testing.T) {
Err string
}
testcases := map[string]testcase{
- "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
- "object": {In: `{}`, Out: `{}`},
+ "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
+ "object": {In: `{}`, Out: `{}`},
+ "non-utf8": {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
}
for tcName, tc := range testcases {
tc := tc
@@ -70,8 +72,9 @@ func TestCompatIndent(t *testing.T) {
Err string
}
testcases := map[string]testcase{
- "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
- "object": {In: `{}`, Out: `{}`},
+ "trunc": {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
+ "object": {In: `{}`, Out: `{}`},
+ "non-utf8": {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
}
for tcName, tc := range testcases {
tc := tc
@@ -89,3 +92,29 @@ func TestCompatIndent(t *testing.T) {
})
}
}
+
+func TestCompatMarshal(t *testing.T) {
+ t.Parallel()
+ type testcase struct {
+ In any
+ Out string
+ Err string
+ }
+ testcases := map[string]testcase{
+ "non-utf8": {In: "\x85\xcd", Out: "\"\\ufffd\\ufffd\""},
+ "urc": {In: "\ufffd", Out: "\"\ufffd\""},
+ }
+ for tcName, tc := range testcases {
+ tc := tc
+ t.Run(tcName, func(t *testing.T) {
+ t.Parallel()
+ out, err := Marshal(tc.In)
+ assert.Equal(t, tc.Out, string(out))
+ if tc.Err == "" {
+ assert.NoError(t, err)
+ } else {
+ assert.EqualError(t, err, tc.Err)
+ }
+ })
+ }
+}
diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go
index 42cbf5c..e89b4b4 100644
--- a/compat/json/testcompat_test.go
+++ b/compat/json/testcompat_test.go
@@ -8,6 +8,7 @@ import (
"bytes"
"encoding/json"
"io"
+ "reflect"
_ "unsafe"
"git.lukeshu.com/go/lowmemjson"
@@ -59,13 +60,13 @@ type encodeState struct {
}
func (es *encodeState) string(str string, _ bool) {
- if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil {
+ if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, 0, reflect.Value{}, str); err != nil {
panic(err)
}
}
func (es *encodeState) stringBytes(str []byte, _ bool) {
- if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil {
+ if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, 0, reflect.Value{}, str); err != nil {
panic(err)
}
}