summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 17:20:41 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-18 22:45:54 -0700
commit2eb60b8be25a4b0fe3f1c5d5ca302e7e68190bad (patch)
tree0a4001f1e37d8e3a29fa3f569fa7f850c0d9f766
parent1a5b0561f53441d8a259a5096281699b5af16a6c (diff)
compat/json: Don't do actual JSON parsing in HTMLEscape
-rw-r--r--ReleaseNotes.md5
-rw-r--r--compat/json/compat.go21
-rw-r--r--compat/json/compat_test.go21
-rw-r--r--internal/jsonstring/encode_string.go6
4 files changed, 49 insertions, 4 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index ae147b1..c949fd6 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -24,6 +24,11 @@
+ compat/json.Compact, compat/json.Indent: No longer compact
floating-point numbers; as `encoding/json` doesn't.
+ + compat/json.HTMLEscape: Just look for problematic UTF-8 runes,
+ don't actually parse as JSON. This is consistent with the
+ function's lack of an `error` return value, and with the
+ behavior of `encoding/json`.
+
- Unicode:
+ Feature: Encoder, ReEncoder: Add an `InvalidUTF8`
diff --git a/compat/json/compat.go b/compat/json/compat.go
index d326514..edc6908 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -11,10 +11,13 @@ import (
"bytes"
"encoding/json"
"errors"
+ "fmt"
"io"
"strconv"
+ "unicode/utf8"
"git.lukeshu.com/go/lowmemjson"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
//nolint:stylecheck // ST1021 False positive; these aren't comments on individual types.
@@ -144,7 +147,23 @@ func convertReEncodeError(err error) error {
}
func HTMLEscape(dst *bytes.Buffer, src []byte) {
- _, _ = lowmemjson.NewReEncoder(dst, lowmemjson.ReEncoderConfig{}).Write(src)
+ for n := 0; n < len(src); {
+ c, size := utf8.DecodeRune(src[n:])
+ if c == utf8.RuneError && size == 1 {
+ dst.WriteByte(src[n])
+ } else {
+ mode := lowmemjson.EscapeHTMLSafe(c, lowmemjson.BackslashEscapeNone)
+ switch mode {
+ case lowmemjson.BackslashEscapeNone:
+ dst.WriteRune(c)
+ case lowmemjson.BackslashEscapeUnicode:
+ _ = jsonstring.WriteStringUnicodeEscape(dst, c)
+ default:
+ panic(fmt.Errorf("lowmemjson.EscapeHTMLSafe returned an unexpected escape mode=%d", mode))
+ }
+ }
+ n += size
+ }
}
func reencode(dst io.Writer, src []byte, cfg lowmemjson.ReEncoderConfig) error {
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index 128bd1b..0c14a60 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -11,6 +11,27 @@ import (
"github.com/stretchr/testify/assert"
)
+func TestCompatHTMLEscape(t *testing.T) {
+ t.Parallel()
+ type testcase struct {
+ In string
+ Out string
+ }
+ testcases := map[string]testcase{
+ "invalid": {In: `x`, Out: `x`},
+ }
+ for tcName, tc := range testcases {
+ tc := tc
+ t.Run(tcName, func(t *testing.T) {
+ t.Parallel()
+ t.Logf("in=%q", tc.In)
+ var dst bytes.Buffer
+ HTMLEscape(&dst, []byte(tc.In))
+ assert.Equal(t, tc.Out, dst.String())
+ })
+ }
+}
+
func TestCompatValid(t *testing.T) {
t.Parallel()
type testcase struct {
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go
index 76bbb38..2488cb2 100644
--- a/internal/jsonstring/encode_string.go
+++ b/internal/jsonstring/encode_string.go
@@ -38,7 +38,7 @@ const (
// BackslashEscaper is describe in the main lowmemjson package docs.
type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
-func writeStringUnicodeEscape(w io.Writer, c rune) error {
+func WriteStringUnicodeEscape(w io.Writer, c rune) error {
const alphabet = "0123456789abcdef"
buf := [6]byte{
'\\',
@@ -84,7 +84,7 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) err
case '\b', '\f', '\n', '\r', '\t': // short-escape if possible
return writeStringShortEscape(w, c)
default:
- return writeStringUnicodeEscape(w, c)
+ return WriteStringUnicodeEscape(w, c)
}
case c == '"' || c == '\\': // override, gotta escape these
return writeStringShortEscape(w, c)
@@ -106,7 +106,7 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) err
_, err := w.WriteRune(c)
return err
default: // obey
- return writeStringUnicodeEscape(w, c)
+ return WriteStringUnicodeEscape(w, c)
}
case BackslashEscapeRawByte:
switch {