diff options
Diffstat (limited to 'reencode.go')
-rw-r--r-- | reencode.go | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/reencode.go b/reencode.go index 7439bf0..c19e296 100644 --- a/reencode.go +++ b/reencode.go @@ -243,10 +243,13 @@ func (enc *ReEncoder) getRuneFromString(str string, pos int) (c rune, size int, // but *ReEncoder does because it transforms the data written to it, // and the number of bytes written may be wildly different than the // number of bytes handled. +// +//nolint:dupl // Yes, this is mostly a duplicate of .WriteString(). func (enc *ReEncoder) Write(str []byte) (int, error) { if len(str) == 0 { return 0, nil } + origBufLen := enc.bufLen var n int for { c, size, full, isRune := enc.getRuneFromBytes(str, n) @@ -261,14 +264,14 @@ func (enc *ReEncoder) Write(str []byte) (int, error) { return len(str), nil } if enc.utf == InvalidUTF8Error && !isRune { - return n, &ReEncodeSyntaxError{ + return n - origBufLen, &ReEncodeSyntaxError{ Offset: enc.inputPos, Err: fmt.Errorf("invalid UTF-8: %#02x", c), } } enc.handleRune(c, size, isRune) if enc.err != nil { - return n, enc.err + return n - origBufLen, enc.err } n += size } @@ -276,10 +279,13 @@ func (enc *ReEncoder) Write(str []byte) (int, error) { // WriteString implements io.StringWriter; it does what you'd expect, // but see the notes on the Write method. +// +//nolint:dupl // Yes, this is mostly a duplicate of .Write(). func (enc *ReEncoder) WriteString(str string) (int, error) { if len(str) == 0 { return 0, nil } + origBufLen := enc.bufLen var n int for { c, size, full, isRune := enc.getRuneFromString(str, n) @@ -294,14 +300,14 @@ func (enc *ReEncoder) WriteString(str string) (int, error) { return len(str), nil } if enc.utf == InvalidUTF8Error && !isRune { - return n, &ReEncodeSyntaxError{ + return n - origBufLen, &ReEncodeSyntaxError{ Offset: enc.inputPos, Err: fmt.Errorf("invalid UTF-8: %#02x", c), } } enc.handleRune(c, size, isRune) if enc.err != nil { - return n, enc.err + return n - origBufLen, enc.err } n += size } @@ -323,9 +329,21 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { // if enc.AllowMultipleValues is set. func (enc *ReEncoder) Close() error { if enc.bufLen > 0 { - return &ReEncodeSyntaxError{ - Offset: enc.inputPos, - Err: fmt.Errorf("%w: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]), + if enc.utf == InvalidUTF8Error { + return &ReEncodeSyntaxError{ + Offset: enc.inputPos, + Err: fmt.Errorf("truncated UTF-8: %q", enc.buf[:enc.bufLen]), + } + } + for i := 0; i < enc.bufLen; i++ { + if enc.utf == InvalidUTF8Replace { + enc.handleRune(utf8.RuneError, 1, true) + } else { + enc.handleRune(rune(enc.buf[i]), 1, false) + } + if enc.err != nil { + return enc.err + } } } if _, err := enc.par.HandleEOF(); err != nil { @@ -352,7 +370,7 @@ func (enc *ReEncoder) Close() error { // isRune=false indicates that 'c' is a raw byte from invalid UTF-8. func (enc *ReEncoder) handleRune(c rune, size int, isRune bool) { - t, err := enc.par.HandleRune(c) + t, err := enc.par.HandleRune(c, isRune) if err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, |