// Copyright (C) 2022 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "encoding/json" "io" "reflect" "unicode/utf8" ) const Tab = "\t" const hex = "0123456789abcdef" func hex2int[T interface{ byte | rune }](c T) (byte, bool) { switch { case '0' <= c && c <= '9': return byte(c) - '0', true case 'a' <= c && c <= 'f': return byte(c) - 'a' + 10, true case 'A' <= c && c <= 'F': return byte(c) - 'A' + 10, true default: return 0, false } } var ( numberType = reflect.TypeOf(json.Number("")) byteType = reflect.TypeOf(byte(0)) byteSliceType = reflect.TypeOf(([]byte)(nil)) ) // generic I/O ///////////////////////////////////////////////////////////////// func writeByte(w io.Writer, c byte) error { if br, ok := w.(interface{ WriteByte(byte) error }); ok { return br.WriteByte(c) } var buf [1]byte buf[0] = c if _, err := w.Write(buf[:]); err != nil { return err } return nil } func writeRune(w io.Writer, c rune) (int, error) { if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { return rw.WriteRune(c) } var buf [utf8.UTFMax]byte n := utf8.EncodeRune(buf[:], c) return w.Write(buf[:n]) } // JSON string encoding //////////////////////////////////////////////////////// type BackslashEscapeMode uint8 const ( BackslashEscapeNone = BackslashEscapeMode(iota) BackslashEscapeShort BackslashEscapeUnicode ) type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { return wasEscaped } func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { // JSON is notionally a JS subset, but that's not actually // true. // // http://timelessrepo.com/json-isnt-a-javascript-subset switch c { case '\u2028', '\u2029': return BackslashEscapeUnicode default: return wasEscaped } } func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '&', '<', '>': return BackslashEscapeUnicode default: return EscapeJSSafe(c, wasEscaped) } } func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: return BackslashEscapeUnicode default: return EscapeHTMLSafe(c, wasEscaped) } } func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: return BackslashEscapeUnicode default: return EscapeJSSafe(c, wasEscaped) } } func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { buf := [6]byte{ '\\', 'u', hex[(c>>12)&0xf], hex[(c>>8)&0xf], hex[(c>>4)&0xf], hex[(c>>0)&0xf], } return w.Write(buf[:]) } func writeStringShortEscape(w io.Writer, c rune) (int, error) { var b byte switch c { case '"', '\\', '/': b = byte(c) case '\b': b = 'b' case '\f': b = 'f' case '\n': b = 'n' case '\r': b = 'r' case '\t': b = 't' default: panic("should not happen") } buf := [2]byte{'\\', b} return w.Write(buf[:]) } func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { if escaper == nil { escaper = EscapeDefault } switch escaper(c, wasEscaped) { case BackslashEscapeNone: switch { case c < 0x0020: // override, gotta escape these switch c { case '\b', '\f', '\n', '\r', '\t': // short-escape if possible return writeStringShortEscape(w, c) default: return writeStringUnicodeEscape(w, c) } case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey return writeRune(w, c) } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these return writeRune(w, c) } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) return writeRune(w, c) default: // obey return writeStringUnicodeEscape(w, c) } default: panic("escaper returned an invalid escape mode") } }