summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 21:05:24 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 21:16:32 -0700
commita6cd78ec94f76feba180fa75e942bb5cdeae115f (patch)
treee0b87c21a23a00c6c1245c269fcb157add1cf19b /internal
parentc904fc3e8605ec95c0fac92654d773e8456bf130 (diff)
Move string-encoding to an internal/jsonstring package
Diffstat (limited to 'internal')
-rw-r--r--internal/jsonstring/encode_string.go128
1 files changed, 128 insertions, 0 deletions
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go
new file mode 100644
index 0000000..f29dc3f
--- /dev/null
+++ b/internal/jsonstring/encode_string.go
@@ -0,0 +1,128 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package jsonstring
+
+import (
+ "fmt"
+ "io"
+ "unicode/utf8"
+
+ "git.lukeshu.com/go/lowmemjson/internal/fastio"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+)
+
+// BackslashEscapeMode is describe in the main lowmemjson package
+// docs.
+type BackslashEscapeMode uint8
+
+const (
+ BackslashEscapeNone BackslashEscapeMode = iota
+ BackslashEscapeShort
+ BackslashEscapeUnicode
+)
+
+// BackslashEscaper is describe in the main lowmemjson package docs.
+type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
+
+func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
+ buf := [6]byte{
+ '\\',
+ 'u',
+ jsonparse.Hex[(c>>12)&0xf],
+ jsonparse.Hex[(c>>8)&0xf],
+ jsonparse.Hex[(c>>4)&0xf],
+ jsonparse.Hex[(c>>0)&0xf],
+ }
+ return w.Write(buf[:])
+}
+
+func writeStringShortEscape(w io.Writer, c rune) (int, error) {
+ var b byte
+ switch c {
+ case '"', '\\', '/':
+ b = byte(c)
+ case '\b':
+ b = 'b'
+ case '\f':
+ b = 'f'
+ case '\n':
+ b = 'n'
+ case '\r':
+ b = 'r'
+ case '\t':
+ b = 't'
+ default:
+ panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c))
+ }
+ buf := [2]byte{'\\', b}
+ return w.Write(buf[:])
+}
+
+func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) {
+ switch escape {
+ case BackslashEscapeNone:
+ switch {
+ case c < 0x0020: // override, gotta escape these
+ switch c {
+ case '\b', '\f', '\n', '\r', '\t': // short-escape if possible
+ return writeStringShortEscape(w, c)
+ default:
+ return writeStringUnicodeEscape(w, c)
+ }
+ case c == '"' || c == '\\': // override, gotta escape these
+ return writeStringShortEscape(w, c)
+ default: // obey
+ return w.WriteRune(c)
+ }
+ case BackslashEscapeShort:
+ switch c {
+ case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey
+ return writeStringShortEscape(w, c)
+ default: // override, can't short-escape these
+ return w.WriteRune(c)
+ }
+ case BackslashEscapeUnicode:
+ switch {
+ case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?)
+ return w.WriteRune(c)
+ default: // obey
+ return writeStringUnicodeEscape(w, c)
+ }
+ default:
+ panic("escaper returned an invalid escape mode")
+ }
+}
+
+func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error {
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ for _, c := range str {
+ if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ return err
+ }
+ }
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ return nil
+}
+
+func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error {
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ for i := 0; i < len(str); {
+ c, size := utf8.DecodeRune(str[i:])
+ if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ return err
+ }
+ i += size
+ }
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ return nil
+}