diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-30 21:54:38 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-30 21:54:38 -0700 |
commit | 8467bdaa181257d031a258a05012dc85adbcb233 (patch) | |
tree | bc9bf437a34905f0b7249352043aff9e9d80ebe8 /encode_escape.go | |
parent | 0b57145421e7e4f165f64e73ee7c5d8102945569 (diff) | |
parent | 2e48a42fb9b9e946958810cfbb90ae85bee997e4 (diff) |
Merge branch 'lukeshu/quality2'
Diffstat (limited to 'encode_escape.go')
-rw-r--r-- | encode_escape.go | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/encode_escape.go b/encode_escape.go new file mode 100644 index 0000000..ab0d9c1 --- /dev/null +++ b/encode_escape.go @@ -0,0 +1,103 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "unicode/utf8" +) + +// BackslashEscapeMode identifies one of the three ways that a +// character may be represented in a JSON string: +// +// - literally (no backslash escaping) +// +// - as a short "well-known" `\X` backslash sequence (where `X` is a +// single-character) +// +// - as a long Unicode `\uXXXX` backslash sequence +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// A BackslashEscaper controls how a ReEncoder emits a character in a +// JSON string. The `rune` argument is the character being +// considered, and the `BackslashEscapeMode` argument is how it was +// originally encoded in the input. +// +// The ReEncoder will panic if a BackslashEscaper returns an unknown +// BackslashEscapeMode. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +// EscapePreserve is a BackslashEscaper that preserves the original +// input escaping. +func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + return wasEscaped +} + +// EscapeJSSafe is a BackslashEscaper that escapes strings such that +// the JSON safe to embed in JS; it otherwise preserves the original +// input escaping. +// +// JSON is notionally a JS subset, but that's not actually true; so +// more conservative backslash-escaping is necessary to safely embed +// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset +func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\u2028', '\u2029': + return BackslashEscapeUnicode + default: + return wasEscaped + } +} + +// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that +// the JSON is safe to embed in HTML; it otherwise preserves the +// original input escaping. +func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '&', '<', '>': + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } +} + +// EscapeDefault is a BackslashEscaper that mimics the default +// behavior of encoding/json. +// +// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` +// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement +// character. +// +// A ReEncoder uses EscapeDefault if a BackslashEscaper is not +// specified. +func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeHTMLSafe(c, wasEscaped) + } +} + +// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the +// default behavior of an encoding/json.Encoder that has had +// SetEscapeHTML(false) called on it. +// +// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` +// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement +// character. +func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } +} |