// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package jsonstring import ( "encoding/json" "fmt" "io" "reflect" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/fastio/noescape" ) // InvalidUTF8Mode is describe in the main lowmemjson package docs. type InvalidUTF8Mode uint8 const ( InvalidUTF8Replace InvalidUTF8Mode = iota InvalidUTF8Preserve InvalidUTF8Error ) // BackslashEscapeMode is describe in the main lowmemjson package // docs. type BackslashEscapeMode uint8 const ( BackslashEscapeNone BackslashEscapeMode = iota BackslashEscapeShort BackslashEscapeRawByte // It is significant to the implementation that if X=binary-0 // and x=binary-1, then these "BackslashEscapeUnicode" // constants are counting in-order from 0 to 15. BackslashEscapeUnicodeXXXX BackslashEscapeUnicodeXXXx BackslashEscapeUnicodeXXxX BackslashEscapeUnicodeXXxx BackslashEscapeUnicodeXxXX BackslashEscapeUnicodeXxXx BackslashEscapeUnicodeXxxX BackslashEscapeUnicodeXxxx BackslashEscapeUnicodexXXX BackslashEscapeUnicodexXXx BackslashEscapeUnicodexXxX BackslashEscapeUnicodexXxx BackslashEscapeUnicodexxXX BackslashEscapeUnicodexxXx BackslashEscapeUnicodexxxX BackslashEscapeUnicodexxxx BackslashEscapeUnicodeMin = BackslashEscapeUnicodeXXXX BackslashEscapeUnicodeMax = BackslashEscapeUnicodexxxx BackslashEscapeUnicode = BackslashEscapeUnicodexxxx // back-compat ) // BackslashEscaper is describe in the main lowmemjson package docs. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode func WriteStringUnicodeEscape(w io.Writer, c rune, mode BackslashEscapeMode) error { const alphabet = "0123456789ABCDEF" _mode := byte(mode - BackslashEscapeUnicodeMin) buf := [6]byte{ '\\', 'u', // The 0b0010_0000 bit is the ASCII "lowercase bit". alphabet[(c>>12)&0xf] | ((_mode << 2) & 0b0010_0000), alphabet[(c>>8)&0xf] | ((_mode << 3) & 0b0010_0000), alphabet[(c>>4)&0xf] | ((_mode << 4) & 0b0010_0000), alphabet[(c>>0)&0xf] | ((_mode << 5) & 0b0010_0000), } _, err := noescape.Write(w, buf[:]) return err } func writeStringShortEscape(w io.Writer, c rune) error { var b byte switch c { case '"', '\\', '/': b = byte(c) case '\b': b = 'b' case '\f': b = 'f' case '\n': b = 'n' case '\r': b = 'r' case '\t': b = 't' default: panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) } buf := [2]byte{'\\', b} _, err := noescape.Write(w, buf[:]) return err } func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { switch escape { case BackslashEscapeNone: switch { case c < 0x0020: // override, gotta escape these switch c { case '\b', '\f', '\n', '\r', '\t': // short-escape if possible return writeStringShortEscape(w, c) default: return WriteStringUnicodeEscape(w, c, BackslashEscapeUnicode) } case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey _, err := w.WriteRune(c) return err } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these _, err := w.WriteRune(c) return err } case BackslashEscapeRawByte: switch { case c < utf8.RuneSelf: panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q < utf8.RuneSelf", c)) case c > 0xFF: panic(fmt.Errorf("escaper returned BackslashEscapeRawByte for a character=%q > 0xFF", c)) default: return w.WriteByte(byte(c)) } default: if BackslashEscapeUnicodeMin <= escape && escape <= BackslashEscapeUnicodeMax { switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) _, err := w.WriteRune(c) return err default: // obey return WriteStringUnicodeEscape(w, c, escape) } } panic(fmt.Errorf("escaper returned an invalid escape mode=%d", escape)) } } func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str string) error { if err := w.WriteByte('"'); err != nil { return err } for i := 0; i < len(str); { escaped := BackslashEscapeNone c, size := utf8.DecodeRuneInString(str[i:]) if c == utf8.RuneError && size == 1 { switch utf { case InvalidUTF8Replace: escaped = BackslashEscapeUnicode case InvalidUTF8Preserve: escaped = BackslashEscapeRawByte c = rune(str[i]) case InvalidUTF8Error: return &json.UnsupportedValueError{ Value: val, Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]), } } } if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil { return err } i += size } if err := w.WriteByte('"'); err != nil { return err } return nil } func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, utf InvalidUTF8Mode, val reflect.Value, str []byte) error { if err := w.WriteByte('"'); err != nil { return err } for i := 0; i < len(str); { escaped := BackslashEscapeNone c, size := utf8.DecodeRune(str[i:]) if c == utf8.RuneError && size == 1 { switch utf { case InvalidUTF8Replace: escaped = BackslashEscapeUnicode case InvalidUTF8Preserve: escaped = BackslashEscapeRawByte c = rune(str[i]) case InvalidUTF8Error: return &json.UnsupportedValueError{ Value: val, Str: fmt.Sprintf("invalid UTF-8 at byte offset %d: %#02x", i, str[i]), } } } if err := WriteStringChar(w, c, escaper(c, escaped)); err != nil { return err } i += size } if err := w.WriteByte('"'); err != nil { return err } return nil }