diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-14 22:36:25 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-18 22:45:54 -0700 |
commit | dfc67cecbd95344d296c31b537fa3ae8aec8c292 (patch) | |
tree | 1e2e820cbd288d1ebef7b0e9dea14a07e2f33fc5 /internal | |
parent | 38989a9c4f69abfe04c3eb4ec3382be88802141c (diff) |
encode, reencode: Fix handling of invalid UTF-8
Diffstat (limited to 'internal')
-rw-r--r-- | internal/jsonstring/encode_string.go | 65 |
1 files changed, 59 insertions, 6 deletions
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index fec2cc0..76bbb38 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -5,14 +5,25 @@ package jsonstring import ( + "encoding/json" "fmt" "io" + "reflect" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/fastio/noescape" ) +// InvalidUTF8Mode is describe in the main lowmemjson package docs. +type InvalidUTF8Mode uint8 + +const ( + InvalidUTF8Replace InvalidUTF8Mode = iota + InvalidUTF8Preserve + InvalidUTF8Error +) + // BackslashEscapeMode is describe in the main lowmemjson package // docs. type BackslashEscapeMode uint8 @@ -21,6 +32,7 @@ const ( BackslashEscapeNone BackslashEscapeMode = iota BackslashEscapeShort BackslashEscapeUnicode + BackslashEscapeRawByte ) // BackslashEscaper is describe in the main lowmemjson package docs. @@ -96,19 +108,45 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) err default: // obey return writeStringUnicodeEscape(w, c) } + case BackslashEscapeRawByte: + switch { + case c < utf8.RuneSelf: + panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q < utf8.RuneSelf", c)) + case c > 0xFF: + panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q > 0xFF", c)) + default: + return w.WriteByte(byte(c)) + } default: - panic("escaper returned an invalid escape mode") + panic(fmt.Errorf("escaper returned an invalid escape mode=%d", escape)) } } -func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str string) error { if err := w.WriteByte('"'); err != nil { return err } - for _, c := range str { - if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + for i := 0; i < len(str); { + escaped := BackslashEscapeNone + c, size := utf8.DecodeRuneInString(str[i:]) + if c == utf8.RuneError && size == 1 { + switch utf { + case InvalidUTF8Replace: + escaped = BackslashEscapeUnicode + case InvalidUTF8Preserve: + escaped = BackslashEscapeRawByte + c = rune(str[i]) + case InvalidUTF8Error: + return &json.UnsupportedValueError{ + Value: val, + Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]), + } + } + } + if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil { return err } + i += size } if err := w.WriteByte('"'); err != nil { return err @@ -116,13 +154,28 @@ func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st return nil } -func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str []byte) error { if err := w.WriteByte('"'); err != nil { return err } for i := 0; i < len(str); { + escaped := BackslashEscapeNone c, size := utf8.DecodeRune(str[i:]) - if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if c == utf8.RuneError && size == 1 { + switch utf { + case InvalidUTF8Replace: + escaped = BackslashEscapeUnicode + case InvalidUTF8Preserve: + escaped = BackslashEscapeRawByte + c = rune(str[i]) + case InvalidUTF8Error: + return &json.UnsupportedValueError{ + Value: val, + Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]), + } + } + } + if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil { return err } i += size |