From f68498a6fdb421483d9aebb45527452f6255bb68 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 16:17:01 -0700 Subject: jsonparse: Don't show raw bytes as Unicode --- reencode.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index 7439bf0..8b08aad 100644 --- a/reencode.go +++ b/reencode.go @@ -352,7 +352,7 @@ func (enc *ReEncoder) Close() error { // isRune=false indicates that 'c' is a raw byte from invalid UTF-8. func (enc *ReEncoder) handleRune(c rune, size int, isRune bool) { - t, err := enc.par.HandleRune(c) + t, err := enc.par.HandleRune(c, isRune) if err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, -- cgit v1.2.3-2-g168b From 7bd0072b5896bfc4172b6bda778cf149dd6282fa Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 16:17:06 -0700 Subject: reencode: Fix the byte count for partial writes --- reencode.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index 8b08aad..fd36875 100644 --- a/reencode.go +++ b/reencode.go @@ -243,10 +243,13 @@ func (enc *ReEncoder) getRuneFromString(str string, pos int) (c rune, size int, // but *ReEncoder does because it transforms the data written to it, // and the number of bytes written may be wildly different than the // number of bytes handled. +// +//nolint:dupl // Yes, this is mostly a duplicate of .WriteString(). func (enc *ReEncoder) Write(str []byte) (int, error) { if len(str) == 0 { return 0, nil } + origBufLen := enc.bufLen var n int for { c, size, full, isRune := enc.getRuneFromBytes(str, n) @@ -261,14 +264,14 @@ func (enc *ReEncoder) Write(str []byte) (int, error) { return len(str), nil } if enc.utf == InvalidUTF8Error && !isRune { - return n, &ReEncodeSyntaxError{ + return n - origBufLen, &ReEncodeSyntaxError{ Offset: enc.inputPos, Err: fmt.Errorf("invalid UTF-8: %#02x", c), } } enc.handleRune(c, size, isRune) if enc.err != nil { - return n, enc.err + return n - origBufLen, enc.err } n += size } @@ -276,10 +279,13 @@ func (enc *ReEncoder) Write(str []byte) (int, error) { // WriteString implements io.StringWriter; it does what you'd expect, // but see the notes on the Write method. +// +//nolint:dupl // Yes, this is mostly a duplicate of .Write(). func (enc *ReEncoder) WriteString(str string) (int, error) { if len(str) == 0 { return 0, nil } + origBufLen := enc.bufLen var n int for { c, size, full, isRune := enc.getRuneFromString(str, n) @@ -294,14 +300,14 @@ func (enc *ReEncoder) WriteString(str string) (int, error) { return len(str), nil } if enc.utf == InvalidUTF8Error && !isRune { - return n, &ReEncodeSyntaxError{ + return n - origBufLen, &ReEncodeSyntaxError{ Offset: enc.inputPos, Err: fmt.Errorf("invalid UTF-8: %#02x", c), } } enc.handleRune(c, size, isRune) if enc.err != nil { - return n, enc.err + return n - origBufLen, enc.err } n += size } -- cgit v1.2.3-2-g168b From 22edcf6a68a057ed04368d5f78c8ba3ddfee8d57 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 11:11:36 -0700 Subject: reencode: Improve the error messages for trailing partial-UTF-8 --- reencode.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'reencode.go') diff --git a/reencode.go b/reencode.go index fd36875..c19e296 100644 --- a/reencode.go +++ b/reencode.go @@ -329,9 +329,21 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { // if enc.AllowMultipleValues is set. func (enc *ReEncoder) Close() error { if enc.bufLen > 0 { - return &ReEncodeSyntaxError{ - Offset: enc.inputPos, - Err: fmt.Errorf("%w: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]), + if enc.utf == InvalidUTF8Error { + return &ReEncodeSyntaxError{ + Offset: enc.inputPos, + Err: fmt.Errorf("truncated UTF-8: %q", enc.buf[:enc.bufLen]), + } + } + for i := 0; i < enc.bufLen; i++ { + if enc.utf == InvalidUTF8Replace { + enc.handleRune(utf8.RuneError, 1, true) + } else { + enc.handleRune(rune(enc.buf[i]), 1, false) + } + if enc.err != nil { + return enc.err + } } } if _, err := enc.par.HandleEOF(); err != nil { -- cgit v1.2.3-2-g168b