diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-23 21:30:12 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-25 01:18:59 -0700 |
commit | 051f966039028d257f27fc3a42c10cbff9f7c738 (patch) | |
tree | 6ae73810c4a2959a23294c6e46d13cc7fb7034be /decode_scan.go | |
parent | d35495540df2b6d3ba16c84ce21627d9dbae000c (diff) |
decode: Include the invalid UTF-8 byte in error messages
Diffstat (limited to 'decode_scan.go')
-rw-r--r-- | decode_scan.go | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/decode_scan.go b/decode_scan.go index 63694c4..940de49 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -6,6 +6,7 @@ package lowmemjson import ( "io" + "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) @@ -55,6 +56,17 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) sc.offset += int64(sc.rSize) switch err { case nil: + invalidUTF8 := false + if sc.rRune == utf8.RuneError && sc.rSize == 1 { + if bs, ok := sc.inner.(io.ByteScanner); ok { + _ = bs.UnreadByte() // UnreadRune doesn't back up the ReadByte-pos + b, _ := bs.ReadByte() + _ = bs.UnreadByte() + _, _, _ = sc.inner.ReadRune() + sc.rRune = rune(b) + invalidUTF8 = true + } + } sc.rType, err = sc.parser.HandleRune(sc.rRune) if err != nil { sc.rErr = &DecodeSyntaxError{ @@ -62,6 +74,9 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) Err: err, } } else { + if invalidUTF8 { + sc.rRune = utf8.RuneError + } sc.rErr = nil } switch sc.rType { |