summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-14 22:36:25 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-18 22:45:54 -0700
commitdfc67cecbd95344d296c31b537fa3ae8aec8c292 (patch)
tree1e2e820cbd288d1ebef7b0e9dea14a07e2f33fc5 /internal
parent38989a9c4f69abfe04c3eb4ec3382be88802141c (diff)
encode, reencode: Fix handling of invalid UTF-8
Diffstat (limited to 'internal')
-rw-r--r--internal/jsonstring/encode_string.go65
1 files changed, 59 insertions, 6 deletions
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go
index fec2cc0..76bbb38 100644
--- a/internal/jsonstring/encode_string.go
+++ b/internal/jsonstring/encode_string.go
@@ -5,14 +5,25 @@
package jsonstring
import (
+ "encoding/json"
"fmt"
"io"
+ "reflect"
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/fastio"
"git.lukeshu.com/go/lowmemjson/internal/fastio/noescape"
)
+// InvalidUTF8Mode is describe in the main lowmemjson package docs.
+type InvalidUTF8Mode uint8
+
+const (
+ InvalidUTF8Replace InvalidUTF8Mode = iota
+ InvalidUTF8Preserve
+ InvalidUTF8Error
+)
+
// BackslashEscapeMode is describe in the main lowmemjson package
// docs.
type BackslashEscapeMode uint8
@@ -21,6 +32,7 @@ const (
BackslashEscapeNone BackslashEscapeMode = iota
BackslashEscapeShort
BackslashEscapeUnicode
+ BackslashEscapeRawByte
)
// BackslashEscaper is describe in the main lowmemjson package docs.
@@ -96,19 +108,45 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) err
default: // obey
return writeStringUnicodeEscape(w, c)
}
+ case BackslashEscapeRawByte:
+ switch {
+ case c < utf8.RuneSelf:
+ panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q < utf8.RuneSelf", c))
+ case c > 0xFF:
+ panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q > 0xFF", c))
+ default:
+ return w.WriteByte(byte(c))
+ }
default:
- panic("escaper returned an invalid escape mode")
+ panic(fmt.Errorf("escaper returned an invalid escape mode=%d", escape))
}
}
-func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error {
+func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str string) error {
if err := w.WriteByte('"'); err != nil {
return err
}
- for _, c := range str {
- if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ for i := 0; i < len(str); {
+ escaped := BackslashEscapeNone
+ c, size := utf8.DecodeRuneInString(str[i:])
+ if c == utf8.RuneError && size == 1 {
+ switch utf {
+ case InvalidUTF8Replace:
+ escaped = BackslashEscapeUnicode
+ case InvalidUTF8Preserve:
+ escaped = BackslashEscapeRawByte
+ c = rune(str[i])
+ case InvalidUTF8Error:
+ return &json.UnsupportedValueError{
+ Value: val,
+ Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]),
+ }
+ }
+ }
+ if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil {
return err
}
+ i += size
}
if err := w.WriteByte('"'); err != nil {
return err
@@ -116,13 +154,28 @@ func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st
return nil
}
-func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error {
+func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str []byte) error {
if err := w.WriteByte('"'); err != nil {
return err
}
for i := 0; i < len(str); {
+ escaped := BackslashEscapeNone
c, size := utf8.DecodeRune(str[i:])
- if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ if c == utf8.RuneError && size == 1 {
+ switch utf {
+ case InvalidUTF8Replace:
+ escaped = BackslashEscapeUnicode
+ case InvalidUTF8Preserve:
+ escaped = BackslashEscapeRawByte
+ c = rune(str[i])
+ case InvalidUTF8Error:
+ return &json.UnsupportedValueError{
+ Value: val,
+ Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]),
+ }
+ }
+ }
+ if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil {
return err
}
i += size