From f68498a6fdb421483d9aebb45527452f6255bb68 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 16:17:01 -0700 Subject: jsonparse: Don't show raw bytes as Unicode --- compat/json/compat_test.go | 8 ++++---- compat/json/equiv_test.go | 29 ++++++++++++++++++++++++++++- compat/json/testcompat_test.go | 2 +- 3 files changed, 33 insertions(+), 6 deletions(-) (limited to 'compat/json') diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index cf9e359..2380c53 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -79,7 +79,7 @@ func TestCompatCompact(t *testing.T) { "hex-lower": {In: `"\uabcd"`, Out: `"\uabcd"`}, "hex-upper": {In: `"\uABCD"`, Out: `"\uABCD"`}, "hex-mixed": {In: `"\uAbCd"`, Out: `"\uAbCd"`}, - "invalid-utf8": {In: "\x85", Err: `invalid character '\u0085' looking for beginning of value`}, + "invalid-utf8": {In: "\x85", Err: `invalid character '\x85' looking for beginning of value`}, } for tcName, tc := range testcases { tc := tc @@ -120,7 +120,7 @@ func TestCompatIndent(t *testing.T) { "hex-lower": {In: `"\uabcd"`, Out: `"\uabcd"`}, "hex-upper": {In: `"\uABCD"`, Out: `"\uABCD"`}, "hex-mixed": {In: `"\uAbCd"`, Out: `"\uAbCd"`}, - "invalid-utf8": {In: "\x85", Err: `invalid character '\u0085' looking for beginning of value`}, + "invalid-utf8": {In: "\x85", Err: `invalid character '\x85' looking for beginning of value`}, } for tcName, tc := range testcases { tc := tc @@ -183,7 +183,7 @@ func TestCompatUnmarshal(t *testing.T) { "two-objs": {In: `{} {}`, ExpOut: nil, ExpErr: `invalid character '{' after top-level value`}, "two-numbers1": {In: `00`, ExpOut: nil, ExpErr: `invalid character '0' after top-level value`}, "two-numbers2": {In: `1 2`, ExpOut: nil, ExpErr: `invalid character '2' after top-level value`}, - "invalid-utf8": {In: "\x85", ExpErr: `invalid character '\u0085' looking for beginning of value`}, + "invalid-utf8": {In: "\x85", ExpErr: `invalid character '\x85' looking for beginning of value`}, // 2e308 is slightly more than math.MaxFloat64 (~1.79e308) "obj-overflow": {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`}, "ary-overflow": {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`}, @@ -244,7 +244,7 @@ func TestCompatDecode(t *testing.T) { "two-objs": {In: `{} {}`, ExpOut: map[string]any{}}, "two-numbers1": {In: `00`, ExpOut: float64(0)}, "two-numbers2": {In: `1 2`, ExpOut: float64(1)}, - "invalid-utf8": {In: "\x85", ExpErr: `invalid character '\u0085' looking for beginning of value`}, + "invalid-utf8": {In: "\x85", ExpErr: `invalid character '\x85' looking for beginning of value`}, // 2e308 is slightly more than math.MaxFloat64 (~1.79e308) "obj-overflow": {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`}, "ary-overflow": {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`}, diff --git a/compat/json/equiv_test.go b/compat/json/equiv_test.go index 246e4b3..cb02f43 100644 --- a/compat/json/equiv_test.go +++ b/compat/json/equiv_test.go @@ -44,8 +44,27 @@ func assertEquivErr(t *testing.T, stdErr, lowErr error) { lowByte := lowMsg[len(prefix)] if lowByte == '\\' { switch lowMsg[len(prefix)+1] { + case 'a': + lowByte = '\a' + case 'b': + lowByte = '\b' + case 'f': + lowByte = '\f' + case 'n': + lowByte = '\n' + case 'r': + lowByte = '\r' + case 't': + lowByte = '\t' + case 'v': + lowByte = '\v' + case '\\', '\'': + lowByte = lowMsg[len(prefix)+1] + case 'x': + lowByte64, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:2], 16, 8) + lowByte = byte(lowByte64) case 'u': - lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 32) + lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 16) var buf [4]byte utf8.EncodeRune(buf[:], rune(lowRune)) lowByte = buf[0] @@ -63,6 +82,14 @@ func assertEquivErr(t *testing.T, stdErr, lowErr error) { stdErr = errors.New(stdMsg) } } + + // I'd file a ticket for this, but @dsnet (one of the encoding/json maintainers) says that he's + // working on a parser-rewrite that would fix a bunch of this type of issue. + // https://github.com/golang/go/issues/58680#issuecomment-1444224084 + if strings.HasPrefix(stdMsg, `invalid character '\u00`) && strings.HasPrefix(lowMsg, `invalid character '\x`) { + stdMsg = `invalid character '\x` + strings.TrimPrefix(stdMsg, `invalid character '\u00`) + stdErr = errors.New(stdMsg) + } } // Text-equal. assert.Equal(t, stdErr.Error(), lowErr.Error()) diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go index 73153d9..affcd7c 100644 --- a/compat/json/testcompat_test.go +++ b/compat/json/testcompat_test.go @@ -32,7 +32,7 @@ func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { func isValidNumber(s string) bool { var parser jsonparse.Parser for _, r := range s { - if t, _ := parser.HandleRune(r); !t.IsNumber() { + if t, _ := parser.HandleRune(r, true); !t.IsNumber() { return false } } -- cgit v1.2.3-2-g168b