1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later
package lowmemjson
import (
"fmt"
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
// BackslashEscapeMode identifies one of the three ways that a
// character may be represented in a JSON string:
//
// - literally (no backslash escaping)
//
// - as a short "well-known" `\X` backslash sequence (where `X` is a
// single-character)
//
// - as a long Unicode `\uXXXX` backslash sequence
type BackslashEscapeMode = jsonstring.BackslashEscapeMode
const (
BackslashEscapeNone = jsonstring.BackslashEscapeNone
BackslashEscapeShort = jsonstring.BackslashEscapeShort
BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode
)
func hexToInt(c byte) rune {
switch {
case '0' <= c && c <= '9':
return rune(c) - '0'
case 'a' <= c && c <= 'f':
return rune(c) - 'a' + 10
case 'A' <= c && c <= 'F':
return rune(c) - 'A' + 10
default:
panic(fmt.Errorf("should not happen: invalid hex char: %q", c))
}
}
func hexToRune(a, b, c, d byte) rune {
return 0 |
hexToInt(a)<<12 |
hexToInt(b)<<8 |
hexToInt(c)<<4 |
hexToInt(d)<<0
}
// A BackslashEscaper controls how a ReEncoder emits a character in a
// JSON string. The `rune` argument is the character being
// considered, and the `BackslashEscapeMode` argument is how it was
// originally encoded in the input.
//
// The ReEncoder will panic if a BackslashEscaper returns an unknown
// BackslashEscapeMode.
type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
// EscapePreserve is a BackslashEscaper that preserves the original
// input escaping.
func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
return wasEscaped
}
// EscapeJSSafe is a BackslashEscaper that escapes strings such that
// the JSON safe to embed in JS; it otherwise preserves the original
// input escaping.
//
// JSON is notionally a JS subset, but that's not actually true; so
// more conservative backslash-escaping is necessary to safely embed
// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset
func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
switch c {
case '\u2028', '\u2029':
return BackslashEscapeUnicode
default:
return wasEscaped
}
}
// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that
// the JSON is safe to embed in HTML; it otherwise preserves the
// original input escaping.
func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
switch c {
case '&', '<', '>':
return BackslashEscapeUnicode
default:
return EscapeJSSafe(c, wasEscaped)
}
}
// EscapeDefault is a BackslashEscaper that mimics the default
// behavior of encoding/json.
//
// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX`
// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
// character.
//
// A ReEncoder uses EscapeDefault if a BackslashEscaper is not
// specified.
func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
switch c {
case '\b', '\f', utf8.RuneError:
return BackslashEscapeUnicode
default:
return EscapeHTMLSafe(c, wasEscaped)
}
}
// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the
// default behavior of an encoding/json.Encoder that has had
// SetEscapeHTML(false) called on it.
//
// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX`
// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
// character.
func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
switch c {
case '\b', '\f', utf8.RuneError:
return BackslashEscapeUnicode
default:
return EscapeJSSafe(c, wasEscaped)
}
}
|