diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-25 11:11:36 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-25 20:40:57 -0700 |
commit | 22edcf6a68a057ed04368d5f78c8ba3ddfee8d57 (patch) | |
tree | 62a21db8d3241bcf264bcf0874df632c4ce9ba94 | |
parent | 7bd0072b5896bfc4172b6bda778cf149dd6282fa (diff) |
reencode: Improve the error messages for trailing partial-UTF-8
-rw-r--r-- | ReleaseNotes.md | 4 | ||||
-rw-r--r-- | compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118 | 2 | ||||
-rw-r--r-- | reencode.go | 18 | ||||
-rw-r--r-- | reencode_test.go | 39 |
4 files changed, 59 insertions, 4 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md index c9d1233..71973aa 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -33,6 +33,10 @@ `.WriteString`. This only comes up if there is an I/O causing a partial write. + - Bugfix: ReEncoder: The error messages for trailing partial UTF-8 + now reflect the `InvalidUTF8` setting, rather than simply saying + "unflushed unicode garbage". + # v0.3.7 (2023-02-20) Theme: Fixes from fuzzing (part 1?) diff --git a/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118 b/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118 new file mode 100644 index 0000000..77924f3 --- /dev/null +++ b/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("\xf0") diff --git a/reencode.go b/reencode.go index fd36875..c19e296 100644 --- a/reencode.go +++ b/reencode.go @@ -329,9 +329,21 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { // if enc.AllowMultipleValues is set. func (enc *ReEncoder) Close() error { if enc.bufLen > 0 { - return &ReEncodeSyntaxError{ - Offset: enc.inputPos, - Err: fmt.Errorf("%w: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]), + if enc.utf == InvalidUTF8Error { + return &ReEncodeSyntaxError{ + Offset: enc.inputPos, + Err: fmt.Errorf("truncated UTF-8: %q", enc.buf[:enc.bufLen]), + } + } + for i := 0; i < enc.bufLen; i++ { + if enc.utf == InvalidUTF8Replace { + enc.handleRune(utf8.RuneError, 1, true) + } else { + enc.handleRune(rune(enc.buf[i]), 1, false) + } + if enc.err != nil { + return enc.err + } } } if _, err := enc.par.HandleEOF(); err != nil { diff --git a/reencode_test.go b/reencode_test.go index feabde5..60180c8 100644 --- a/reencode_test.go +++ b/reencode_test.go @@ -15,7 +15,7 @@ import ( "git.lukeshu.com/go/lowmemjson/internal/fastio" ) -func TestReEncode(t *testing.T) { +func TestEncodeReEncode(t *testing.T) { t.Parallel() type testcase struct { enc ReEncoderConfig @@ -168,6 +168,43 @@ func TestReEncode(t *testing.T) { } } +func TestReEncode(t *testing.T) { + t.Parallel() + type testcase struct { + Cfg ReEncoderConfig + In string + ExpOut string + ExpWriteErr string + ExpCloseErr string + } + testcases := map[string]testcase{ + "partial-utf8-replace": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Replace}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: "json: syntax error at input byte 0: invalid character '\uFFFD' looking for beginning of value"}, + "partial-utf8-preserve": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Preserve}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: `json: syntax error at input byte 0: invalid character '\xf0' looking for beginning of value`}, + "partial-utf8-error": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Error}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: `json: syntax error at input byte 0: truncated UTF-8: "\xf0\xbf"`}, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + var out strings.Builder + enc := NewReEncoder(&out, tc.Cfg) + _, err := enc.WriteString(tc.In) + assert.Equal(t, tc.ExpOut, out.String()) + if tc.ExpWriteErr == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.ExpWriteErr) + } + err = enc.Close() + if tc.ExpCloseErr == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tc.ExpCloseErr) + } + }) + } +} + func TestReEncodeWriteSize(t *testing.T) { t.Parallel() |