// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "encoding/json" "io" "reflect" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal" ) var ( numberType = reflect.TypeOf(json.Number("")) byteType = reflect.TypeOf(byte(0)) byteSliceType = reflect.TypeOf(([]byte)(nil)) ) // generic I/O ///////////////////////////////////////////////////////////////// func writeByte(w io.Writer, c byte) error { if br, ok := w.(interface{ WriteByte(byte) error }); ok { return br.WriteByte(c) } var buf [1]byte buf[0] = c if _, err := w.Write(buf[:]); err != nil { return err } return nil } func writeRune(w io.Writer, c rune) (int, error) { if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { return rw.WriteRune(c) } var buf [utf8.UTFMax]byte n := utf8.EncodeRune(buf[:], c) return w.Write(buf[:n]) } // JSON string encoding //////////////////////////////////////////////////////// // BackslashEscapeMode identifies one of the three ways that a // character may be represented in a JSON string: // // - literally (no backslash escaping) // // - as a short "well-known" `\X` backslash sequence (where `X` is a // single-character) // // - as a long Unicode `\uXXXX` backslash sequence type BackslashEscapeMode uint8 const ( BackslashEscapeNone BackslashEscapeMode = iota BackslashEscapeShort BackslashEscapeUnicode ) // A BackslashEscaper controls how a ReEncoder emits a character in a // JSON string. The `rune` argument is the character being // considered, and the `BackslashEscapeMode` argument is how it was // originally encoded in the input. // // The ReEncoder will panic if a BackslashEscaper returns an unknown // BackslashEscapeMode. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode // EscapePreserve is a BackslashEscaper that preserves the original // input escaping. func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { return wasEscaped } // EscapeJSSafe is a BackslashEscaper that escapes strings such that // the JSON safe to embed in JS; it otherwise preserves the original // input escaping. // // JSON is notionally a JS subset, but that's not actually true; so // more conservative backslash-escaping is necessary to safely embed // it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\u2028', '\u2029': return BackslashEscapeUnicode default: return wasEscaped } } // EscapeHTMLSafe is a BackslashEscaper that escapes strings such that // the JSON is safe to embed in HTML; it otherwise preserves the // original input escaping. func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '&', '<', '>': return BackslashEscapeUnicode default: return EscapeJSSafe(c, wasEscaped) } } // EscapeDefault is a BackslashEscaper that mimics the default // behavior of encoding/json. // // It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` // sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement // character. // // A ReEncoder uses EscapeDefault if a BackslashEscaper is not // specified. func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: return BackslashEscapeUnicode default: return EscapeHTMLSafe(c, wasEscaped) } } // EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the // default behavior of an encoding/json.Encoder that has had // SetEscapeHTML(false) called on it. // // It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` // sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement // character. func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: return BackslashEscapeUnicode default: return EscapeJSSafe(c, wasEscaped) } } func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { buf := [6]byte{ '\\', 'u', internal.Hex[(c>>12)&0xf], internal.Hex[(c>>8)&0xf], internal.Hex[(c>>4)&0xf], internal.Hex[(c>>0)&0xf], } return w.Write(buf[:]) } func writeStringShortEscape(w io.Writer, c rune) (int, error) { var b byte switch c { case '"', '\\', '/': b = byte(c) case '\b': b = 'b' case '\f': b = 'f' case '\n': b = 'n' case '\r': b = 'r' case '\t': b = 't' default: panic("should not happen") } buf := [2]byte{'\\', b} return w.Write(buf[:]) } func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { if escaper == nil { escaper = EscapeDefault } switch escaper(c, wasEscaped) { case BackslashEscapeNone: switch { case c < 0x0020: // override, gotta escape these switch c { case '\b', '\f', '\n', '\r', '\t': // short-escape if possible return writeStringShortEscape(w, c) default: return writeStringUnicodeEscape(w, c) } case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey return writeRune(w, c) } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these return writeRune(w, c) } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) return writeRune(w, c) default: // obey return writeStringUnicodeEscape(w, c) } default: panic("escaper returned an invalid escape mode") } }