From d35495540df2b6d3ba16c84ce21627d9dbae000c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 10 Feb 2023 23:38:26 -0700 Subject: Fuzz for equivalence between stdlib and lowmemjson --- compat/json/equiv_test.go | 160 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 compat/json/equiv_test.go (limited to 'compat/json/equiv_test.go') diff --git a/compat/json/equiv_test.go b/compat/json/equiv_test.go new file mode 100644 index 0000000..246e4b3 --- /dev/null +++ b/compat/json/equiv_test.go @@ -0,0 +1,160 @@ +// Copyright (C) 2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package json_test + +import ( + "bytes" + std "encoding/json" + "errors" + "io" + "strconv" + "strings" + "testing" + "unicode/utf8" + + "github.com/stretchr/testify/assert" + + low "git.lukeshu.com/go/lowmemjson/compat/json" +) + +func assertEquivErr(t *testing.T, stdErr, lowErr error) { + if (stdErr == nil) || (lowErr == nil) { + // Nil-equal. + assert.Equal(t, stdErr, lowErr) + return + } + switch stdErr.(type) { + case *std.SyntaxError: + if lowErr != nil { + stdMsg := stdErr.Error() + lowMsg := lowErr.Error() + + // https://github.com/golang/go/issues/58680 + if strings.HasPrefix(stdMsg, `invalid character ' ' `) && + (errors.Is(lowErr, io.ErrUnexpectedEOF) || lowMsg == "unexpected end of JSON input") { + return + } + + // https://github.com/golang/go/issues/58713 + prefix := `invalid character '` + if stdMsg != lowMsg && strings.HasPrefix(stdMsg, prefix) && strings.HasPrefix(lowMsg, prefix) { + stdRune, stdRuneSize := utf8.DecodeRuneInString(stdMsg[len(prefix):]) + lowByte := lowMsg[len(prefix)] + if lowByte == '\\' { + switch lowMsg[len(prefix)+1] { + case 'u': + lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 32) + var buf [4]byte + utf8.EncodeRune(buf[:], rune(lowRune)) + lowByte = buf[0] + case 'U': + lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:8], 16, 32) + var buf [4]byte + utf8.EncodeRune(buf[:], rune(lowRune)) + lowByte = buf[0] + } + } + if stdRune == rune(lowByte) { + lowRuneStr := lowMsg[len(prefix):] + lowRuneStr = lowRuneStr[:strings.IndexByte(lowRuneStr, '\'')] + stdMsg = prefix + lowRuneStr + stdMsg[len(prefix)+stdRuneSize:] + stdErr = errors.New(stdMsg) + } + } + } + // Text-equal. + assert.Equal(t, stdErr.Error(), lowErr.Error()) + // TODO: Assert that they are deep-equal (but be permissive of these not being type aliases). + case *std.MarshalerError: + // Text-equal. + assert.Equal(t, stdErr.Error(), lowErr.Error()) + // TODO: Assert that they are deep-equal (but be permissive of these not being type aliases). + default: + // Text-equal. + assert.Equal(t, stdErr.Error(), lowErr.Error()) + // TODO: Assert that they are deep-equal. + } +} + +func FuzzEquiv(f *testing.F) { + f.Fuzz(func(t *testing.T, str []byte) { + t.Logf("str=%q", str) + t.Run("HTMLEscape", func(t *testing.T) { + var stdOut bytes.Buffer + std.HTMLEscape(&stdOut, str) + + var lowOut bytes.Buffer + low.HTMLEscape(&lowOut, str) + + assert.Equal(t, stdOut.String(), lowOut.String()) + }) + t.Run("Compact", func(t *testing.T) { + var stdOut bytes.Buffer + stdErr := std.Compact(&stdOut, str) + + var lowOut bytes.Buffer + lowErr := low.Compact(&lowOut, str) + + assert.Equal(t, stdOut.String(), lowOut.String()) + assertEquivErr(t, stdErr, lowErr) + }) + t.Run("Indent", func(t *testing.T) { + var stdOut bytes.Buffer + stdErr := std.Indent(&stdOut, str, "»", "\t") + + var lowOut bytes.Buffer + lowErr := low.Indent(&lowOut, str, "»", "\t") + + assert.Equal(t, stdOut.String(), lowOut.String()) + assertEquivErr(t, stdErr, lowErr) + }) + t.Run("Valid", func(t *testing.T) { + stdValid := std.Valid(str) && utf8.Valid(str) // https://github.com/golang/go/issues/58517 + lowValid := low.Valid(str) + assert.Equal(t, stdValid, lowValid) + }) + t.Run("Decode-Encode", func(t *testing.T) { + var stdObj any + stdErr := std.NewDecoder(bytes.NewReader(str)).Decode(&stdObj) + + var lowObj any + lowErr := low.NewDecoder(bytes.NewReader(str)).Decode(&lowObj) + + assert.Equal(t, stdObj, lowObj) + assertEquivErr(t, stdErr, lowErr) + if t.Failed() { + return + } + + var stdOut bytes.Buffer + stdErr = std.NewEncoder(&stdOut).Encode(stdObj) + + var lowOut bytes.Buffer + lowErr = low.NewEncoder(&lowOut).Encode(lowObj) + + assert.Equal(t, stdOut.String(), lowOut.String()) + assertEquivErr(t, stdErr, lowErr) + }) + t.Run("Unmarshal-Marshal", func(t *testing.T) { + var stdObj any + stdErr := std.Unmarshal(str, &stdObj) + + var lowObj any + lowErr := low.Unmarshal(str, &lowObj) + + assert.Equal(t, stdObj, lowObj) + assertEquivErr(t, stdErr, lowErr) + if t.Failed() { + return + } + + stdOut, stdErr := std.Marshal(stdObj) + lowOut, lowErr := low.Marshal(lowObj) + + assert.Equal(t, string(stdOut), string(lowOut)) + assertEquivErr(t, stdErr, lowErr) + }) + }) +} -- cgit v1.2.3-2-g168b From f68498a6fdb421483d9aebb45527452f6255bb68 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 16:17:01 -0700 Subject: jsonparse: Don't show raw bytes as Unicode --- compat/json/equiv_test.go | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'compat/json/equiv_test.go') diff --git a/compat/json/equiv_test.go b/compat/json/equiv_test.go index 246e4b3..cb02f43 100644 --- a/compat/json/equiv_test.go +++ b/compat/json/equiv_test.go @@ -44,8 +44,27 @@ func assertEquivErr(t *testing.T, stdErr, lowErr error) { lowByte := lowMsg[len(prefix)] if lowByte == '\\' { switch lowMsg[len(prefix)+1] { + case 'a': + lowByte = '\a' + case 'b': + lowByte = '\b' + case 'f': + lowByte = '\f' + case 'n': + lowByte = '\n' + case 'r': + lowByte = '\r' + case 't': + lowByte = '\t' + case 'v': + lowByte = '\v' + case '\\', '\'': + lowByte = lowMsg[len(prefix)+1] + case 'x': + lowByte64, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:2], 16, 8) + lowByte = byte(lowByte64) case 'u': - lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 32) + lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 16) var buf [4]byte utf8.EncodeRune(buf[:], rune(lowRune)) lowByte = buf[0] @@ -63,6 +82,14 @@ func assertEquivErr(t *testing.T, stdErr, lowErr error) { stdErr = errors.New(stdMsg) } } + + // I'd file a ticket for this, but @dsnet (one of the encoding/json maintainers) says that he's + // working on a parser-rewrite that would fix a bunch of this type of issue. + // https://github.com/golang/go/issues/58680#issuecomment-1444224084 + if strings.HasPrefix(stdMsg, `invalid character '\u00`) && strings.HasPrefix(lowMsg, `invalid character '\x`) { + stdMsg = `invalid character '\x` + strings.TrimPrefix(stdMsg, `invalid character '\u00`) + stdErr = errors.New(stdMsg) + } } // Text-equal. assert.Equal(t, stdErr.Error(), lowErr.Error()) -- cgit v1.2.3-2-g168b