summaryrefslogtreecommitdiff
path: root/encode_escape.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-01-30 21:54:38 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-01-30 21:54:38 -0700
commit8467bdaa181257d031a258a05012dc85adbcb233 (patch)
treebc9bf437a34905f0b7249352043aff9e9d80ebe8 /encode_escape.go
parent0b57145421e7e4f165f64e73ee7c5d8102945569 (diff)
parent2e48a42fb9b9e946958810cfbb90ae85bee997e4 (diff)
Merge branch 'lukeshu/quality2'
Diffstat (limited to 'encode_escape.go')
-rw-r--r--encode_escape.go103
1 files changed, 103 insertions, 0 deletions
diff --git a/encode_escape.go b/encode_escape.go
new file mode 100644
index 0000000..ab0d9c1
--- /dev/null
+++ b/encode_escape.go
@@ -0,0 +1,103 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+ "unicode/utf8"
+)
+
+// BackslashEscapeMode identifies one of the three ways that a
+// character may be represented in a JSON string:
+//
+// - literally (no backslash escaping)
+//
+// - as a short "well-known" `\X` backslash sequence (where `X` is a
+// single-character)
+//
+// - as a long Unicode `\uXXXX` backslash sequence
+type BackslashEscapeMode uint8
+
+const (
+ BackslashEscapeNone BackslashEscapeMode = iota
+ BackslashEscapeShort
+ BackslashEscapeUnicode
+)
+
+// A BackslashEscaper controls how a ReEncoder emits a character in a
+// JSON string. The `rune` argument is the character being
+// considered, and the `BackslashEscapeMode` argument is how it was
+// originally encoded in the input.
+//
+// The ReEncoder will panic if a BackslashEscaper returns an unknown
+// BackslashEscapeMode.
+type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
+
+// EscapePreserve is a BackslashEscaper that preserves the original
+// input escaping.
+func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
+ return wasEscaped
+}
+
+// EscapeJSSafe is a BackslashEscaper that escapes strings such that
+// the JSON safe to embed in JS; it otherwise preserves the original
+// input escaping.
+//
+// JSON is notionally a JS subset, but that's not actually true; so
+// more conservative backslash-escaping is necessary to safely embed
+// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset
+func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
+ switch c {
+ case '\u2028', '\u2029':
+ return BackslashEscapeUnicode
+ default:
+ return wasEscaped
+ }
+}
+
+// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that
+// the JSON is safe to embed in HTML; it otherwise preserves the
+// original input escaping.
+func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
+ switch c {
+ case '&', '<', '>':
+ return BackslashEscapeUnicode
+ default:
+ return EscapeJSSafe(c, wasEscaped)
+ }
+}
+
+// EscapeDefault is a BackslashEscaper that mimics the default
+// behavior of encoding/json.
+//
+// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX`
+// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
+// character.
+//
+// A ReEncoder uses EscapeDefault if a BackslashEscaper is not
+// specified.
+func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
+ switch c {
+ case '\b', '\f', utf8.RuneError:
+ return BackslashEscapeUnicode
+ default:
+ return EscapeHTMLSafe(c, wasEscaped)
+ }
+}
+
+// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the
+// default behavior of an encoding/json.Encoder that has had
+// SetEscapeHTML(false) called on it.
+//
+// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX`
+// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
+// character.
+func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
+ switch c {
+ case '\b', '\f', utf8.RuneError:
+ return BackslashEscapeUnicode
+ default:
+ return EscapeJSSafe(c, wasEscaped)
+ }
+}