// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "unicode/utf8" ) // BackslashEscapeMode identifies one of the three ways that a // character may be represented in a JSON string: // // - literally (no backslash escaping) // // - as a short "well-known" `\X` backslash sequence (where `X` is a // single-character) // // - as a long Unicode `\uXXXX` backslash sequence type BackslashEscapeMode uint8 const ( BackslashEscapeNone BackslashEscapeMode = iota BackslashEscapeShort BackslashEscapeUnicode ) // A BackslashEscaper controls how a ReEncoder emits a character in a // JSON string. The `rune` argument is the character being // considered, and the `BackslashEscapeMode` argument is how it was // originally encoded in the input. // // The ReEncoder will panic if a BackslashEscaper returns an unknown // BackslashEscapeMode. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode // EscapePreserve is a BackslashEscaper that preserves the original // input escaping. func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { return wasEscaped } // EscapeJSSafe is a BackslashEscaper that escapes strings such that // the JSON safe to embed in JS; it otherwise preserves the original // input escaping. // // JSON is notionally a JS subset, but that's not actually true; so // more conservative backslash-escaping is necessary to safely embed // it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\u2028', '\u2029': return BackslashEscapeUnicode default: return wasEscaped } } // EscapeHTMLSafe is a BackslashEscaper that escapes strings such that // the JSON is safe to embed in HTML; it otherwise preserves the // original input escaping. func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '&', '<', '>': return BackslashEscapeUnicode default: return EscapeJSSafe(c, wasEscaped) } } // EscapeDefault is a BackslashEscaper that mimics the default // behavior of encoding/json. // // It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` // sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement // character. // // A ReEncoder uses EscapeDefault if a BackslashEscaper is not // specified. func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: return BackslashEscapeUnicode default: return EscapeHTMLSafe(c, wasEscaped) } } // EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the // default behavior of an encoding/json.Encoder that has had // SetEscapeHTML(false) called on it. // // It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` // sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement // character. func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: return BackslashEscapeUnicode default: return EscapeJSSafe(c, wasEscaped) } }