summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-25 11:11:36 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-25 20:40:57 -0700
commit22edcf6a68a057ed04368d5f78c8ba3ddfee8d57 (patch)
tree62a21db8d3241bcf264bcf0874df632c4ce9ba94
parent7bd0072b5896bfc4172b6bda778cf149dd6282fa (diff)
reencode: Improve the error messages for trailing partial-UTF-8
-rw-r--r--ReleaseNotes.md4
-rw-r--r--compat/json/testdata/fuzz/FuzzEquiv/95640f7d887081182
-rw-r--r--reencode.go18
-rw-r--r--reencode_test.go39
4 files changed, 59 insertions, 4 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index c9d1233..71973aa 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -33,6 +33,10 @@
`.WriteString`. This only comes up if there is an I/O causing a
partial write.
+ - Bugfix: ReEncoder: The error messages for trailing partial UTF-8
+ now reflect the `InvalidUTF8` setting, rather than simply saying
+ "unflushed unicode garbage".
+
# v0.3.7 (2023-02-20)
Theme: Fixes from fuzzing (part 1?)
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118 b/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118
new file mode 100644
index 0000000..77924f3
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\xf0")
diff --git a/reencode.go b/reencode.go
index fd36875..c19e296 100644
--- a/reencode.go
+++ b/reencode.go
@@ -329,9 +329,21 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
// if enc.AllowMultipleValues is set.
func (enc *ReEncoder) Close() error {
if enc.bufLen > 0 {
- return &ReEncodeSyntaxError{
- Offset: enc.inputPos,
- Err: fmt.Errorf("%w: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]),
+ if enc.utf == InvalidUTF8Error {
+ return &ReEncodeSyntaxError{
+ Offset: enc.inputPos,
+ Err: fmt.Errorf("truncated UTF-8: %q", enc.buf[:enc.bufLen]),
+ }
+ }
+ for i := 0; i < enc.bufLen; i++ {
+ if enc.utf == InvalidUTF8Replace {
+ enc.handleRune(utf8.RuneError, 1, true)
+ } else {
+ enc.handleRune(rune(enc.buf[i]), 1, false)
+ }
+ if enc.err != nil {
+ return enc.err
+ }
}
}
if _, err := enc.par.HandleEOF(); err != nil {
diff --git a/reencode_test.go b/reencode_test.go
index feabde5..60180c8 100644
--- a/reencode_test.go
+++ b/reencode_test.go
@@ -15,7 +15,7 @@ import (
"git.lukeshu.com/go/lowmemjson/internal/fastio"
)
-func TestReEncode(t *testing.T) {
+func TestEncodeReEncode(t *testing.T) {
t.Parallel()
type testcase struct {
enc ReEncoderConfig
@@ -168,6 +168,43 @@ func TestReEncode(t *testing.T) {
}
}
+func TestReEncode(t *testing.T) {
+ t.Parallel()
+ type testcase struct {
+ Cfg ReEncoderConfig
+ In string
+ ExpOut string
+ ExpWriteErr string
+ ExpCloseErr string
+ }
+ testcases := map[string]testcase{
+ "partial-utf8-replace": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Replace}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: "json: syntax error at input byte 0: invalid character '\uFFFD' looking for beginning of value"},
+ "partial-utf8-preserve": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Preserve}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: `json: syntax error at input byte 0: invalid character '\xf0' looking for beginning of value`},
+ "partial-utf8-error": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Error}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: `json: syntax error at input byte 0: truncated UTF-8: "\xf0\xbf"`},
+ }
+ for tcName, tc := range testcases {
+ tc := tc
+ t.Run(tcName, func(t *testing.T) {
+ t.Parallel()
+ var out strings.Builder
+ enc := NewReEncoder(&out, tc.Cfg)
+ _, err := enc.WriteString(tc.In)
+ assert.Equal(t, tc.ExpOut, out.String())
+ if tc.ExpWriteErr == "" {
+ assert.NoError(t, err)
+ } else {
+ assert.EqualError(t, err, tc.ExpWriteErr)
+ }
+ err = enc.Close()
+ if tc.ExpCloseErr == "" {
+ assert.NoError(t, err)
+ } else {
+ assert.EqualError(t, err, tc.ExpCloseErr)
+ }
+ })
+ }
+}
+
func TestReEncodeWriteSize(t *testing.T) {
t.Parallel()