diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-26 22:31:32 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-29 21:01:42 -0700 |
commit | ff6dc0bc519886905e758a84e572f5e34d6c03d1 (patch) | |
tree | 8906c4b8bb0c7dc468419efc17b872db62a4e068 /misc.go | |
parent | d1b5bc1f05624614f43ef85597f4aa9d7a166d23 (diff) |
Move things between files
Diffstat (limited to 'misc.go')
-rw-r--r-- | misc.go | 211 |
1 files changed, 0 insertions, 211 deletions
diff --git a/misc.go b/misc.go deleted file mode 100644 index fb96b4e..0000000 --- a/misc.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "encoding/json" - "io" - "reflect" - "unicode/utf8" - - "git.lukeshu.com/go/lowmemjson/internal" -) - -var ( - numberType = reflect.TypeOf(json.Number("")) - byteType = reflect.TypeOf(byte(0)) - byteSliceType = reflect.TypeOf(([]byte)(nil)) -) - -// generic I/O ///////////////////////////////////////////////////////////////// - -func writeByte(w io.Writer, c byte) error { - if br, ok := w.(interface{ WriteByte(byte) error }); ok { - return br.WriteByte(c) - } - var buf [1]byte - buf[0] = c - if _, err := w.Write(buf[:]); err != nil { - return err - } - return nil -} - -func writeRune(w io.Writer, c rune) (int, error) { - if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { - return rw.WriteRune(c) - } - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], c) - return w.Write(buf[:n]) -} - -// JSON string encoding //////////////////////////////////////////////////////// - -// BackslashEscapeMode identifies one of the three ways that a -// character may be represented in a JSON string: -// -// - literally (no backslash escaping) -// -// - as a short "well-known" `\X` backslash sequence (where `X` is a -// single-character) -// -// - as a long Unicode `\uXXXX` backslash sequence -type BackslashEscapeMode uint8 - -const ( - BackslashEscapeNone BackslashEscapeMode = iota - BackslashEscapeShort - BackslashEscapeUnicode -) - -// A BackslashEscaper controls how a ReEncoder emits a character in a -// JSON string. The `rune` argument is the character being -// considered, and the `BackslashEscapeMode` argument is how it was -// originally encoded in the input. -// -// The ReEncoder will panic if a BackslashEscaper returns an unknown -// BackslashEscapeMode. -type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode - -// EscapePreserve is a BackslashEscaper that preserves the original -// input escaping. -func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - return wasEscaped -} - -// EscapeJSSafe is a BackslashEscaper that escapes strings such that -// the JSON safe to embed in JS; it otherwise preserves the original -// input escaping. -// -// JSON is notionally a JS subset, but that's not actually true; so -// more conservative backslash-escaping is necessary to safely embed -// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset -func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\u2028', '\u2029': - return BackslashEscapeUnicode - default: - return wasEscaped - } -} - -// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that -// the JSON is safe to embed in HTML; it otherwise preserves the -// original input escaping. -func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '&', '<', '>': - return BackslashEscapeUnicode - default: - return EscapeJSSafe(c, wasEscaped) - } -} - -// EscapeDefault is a BackslashEscaper that mimics the default -// behavior of encoding/json. -// -// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` -// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement -// character. -// -// A ReEncoder uses EscapeDefault if a BackslashEscaper is not -// specified. -func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\b', '\f', utf8.RuneError: - return BackslashEscapeUnicode - default: - return EscapeHTMLSafe(c, wasEscaped) - } -} - -// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the -// default behavior of an encoding/json.Encoder that has had -// SetEscapeHTML(false) called on it. -// -// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` -// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement -// character. -func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\b', '\f', utf8.RuneError: - return BackslashEscapeUnicode - default: - return EscapeJSSafe(c, wasEscaped) - } -} - -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { - buf := [6]byte{ - '\\', - 'u', - internal.Hex[(c>>12)&0xf], - internal.Hex[(c>>8)&0xf], - internal.Hex[(c>>4)&0xf], - internal.Hex[(c>>0)&0xf], - } - return w.Write(buf[:]) -} - -func writeStringShortEscape(w io.Writer, c rune) (int, error) { - var b byte - switch c { - case '"', '\\', '/': - b = byte(c) - case '\b': - b = 'b' - case '\f': - b = 'f' - case '\n': - b = 'n' - case '\r': - b = 'r' - case '\t': - b = 't' - default: - panic("should not happen") - } - buf := [2]byte{'\\', b} - return w.Write(buf[:]) -} - -func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { - if escaper == nil { - escaper = EscapeDefault - } - switch escaper(c, wasEscaped) { - case BackslashEscapeNone: - switch { - case c < 0x0020: // override, gotta escape these - switch c { - case '\b', '\f', '\n', '\r', '\t': // short-escape if possible - return writeStringShortEscape(w, c) - default: - return writeStringUnicodeEscape(w, c) - } - case c == '"' || c == '\\': // override, gotta escape these - return writeStringShortEscape(w, c) - default: // obey - return writeRune(w, c) - } - case BackslashEscapeShort: - switch c { - case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey - return writeStringShortEscape(w, c) - default: // override, can't short-escape these - return writeRune(w, c) - } - case BackslashEscapeUnicode: - switch { - case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return writeRune(w, c) - default: // obey - return writeStringUnicodeEscape(w, c) - } - default: - panic("escaper returned an invalid escape mode") - } -} |