summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:30:54 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:30:54 -0700
commitdebef01cc500fb9368e1d6d0206a32ca358a8c98 (patch)
treef021ae7890922e10a1aa119dcdbd7dd2a587f09e /internal
parentd7414035894f378c9e1d48b04a767f61b082186a (diff)
parentf823342d5b9c2ca376d038471889176ab74acf1b (diff)
Merge branch 'lukeshu/misc'
Diffstat (limited to 'internal')
-rw-r--r--internal/jsonparse/hex.go20
-rw-r--r--internal/jsonparse/parse.go28
-rw-r--r--internal/jsonstring/encode_string.go133
3 files changed, 148 insertions, 33 deletions
diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go
deleted file mode 100644
index 3ed5f01..0000000
--- a/internal/jsonparse/hex.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
-//
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-package jsonparse
-
-const Hex = "0123456789abcdef"
-
-func HexToInt(c rune) (byte, bool) {
- switch {
- case '0' <= c && c <= '9':
- return byte(c) - '0', true
- case 'a' <= c && c <= 'f':
- return byte(c) - 'a' + 10, true
- case 'A' <= c && c <= 'F':
- return byte(c) - 'A' + 10, true
- default:
- return 0, false
- }
-}
diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index 73584d9..2f5c1ab 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -14,6 +14,12 @@ import (
var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
+func isHex(c rune) bool {
+ return ('0' <= c && c <= '9') ||
+ ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F')
+}
+
// RuneType is the classification of a rune when parsing JSON input.
// A Parser, rather than grouping runes into tokens and classifying
// tokens, classifies runes directly.
@@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
}
case RuneTypeStringEscU:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUA), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUA), nil
case RuneTypeStringEscUA:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUB), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUB), nil
case RuneTypeStringEscUB:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUC), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUC), nil
case RuneTypeStringEscUC:
- if _, ok := HexToInt(c); ok {
- par.replaceState(RuneTypeStringBeg)
- return RuneTypeStringEscUD, nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEscUD, nil
// number //////////////////////////////////////////////////////////////////////////////////
//
// Here's a flattened drawing of the syntax diagram from www.json.org :
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go
new file mode 100644
index 0000000..1b0c68a
--- /dev/null
+++ b/internal/jsonstring/encode_string.go
@@ -0,0 +1,133 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package jsonstring
+
+import (
+ "fmt"
+ "io"
+ "unicode/utf8"
+
+ "git.lukeshu.com/go/lowmemjson/internal/fastio"
+)
+
+// BackslashEscapeMode is describe in the main lowmemjson package
+// docs.
+type BackslashEscapeMode uint8
+
+const (
+ BackslashEscapeNone BackslashEscapeMode = iota
+ BackslashEscapeShort
+ BackslashEscapeUnicode
+)
+
+// BackslashEscaper is describe in the main lowmemjson package docs.
+type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
+
+func writeStringUnicodeEscape(w io.Writer, c rune) error {
+ const alphabet = "0123456789abcdef"
+ buf := [6]byte{
+ '\\',
+ 'u',
+ alphabet[(c>>12)&0xf],
+ alphabet[(c>>8)&0xf],
+ alphabet[(c>>4)&0xf],
+ alphabet[(c>>0)&0xf],
+ }
+ _, err := w.Write(buf[:])
+ return err
+}
+
+func writeStringShortEscape(w io.Writer, c rune) error {
+ var b byte
+ switch c {
+ case '"', '\\', '/':
+ b = byte(c)
+ case '\b':
+ b = 'b'
+ case '\f':
+ b = 'f'
+ case '\n':
+ b = 'n'
+ case '\r':
+ b = 'r'
+ case '\t':
+ b = 't'
+ default:
+ panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c))
+ }
+ buf := [2]byte{'\\', b}
+ _, err := w.Write(buf[:])
+ return err
+}
+
+func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error {
+ switch escape {
+ case BackslashEscapeNone:
+ switch {
+ case c < 0x0020: // override, gotta escape these
+ switch c {
+ case '\b', '\f', '\n', '\r', '\t': // short-escape if possible
+ return writeStringShortEscape(w, c)
+ default:
+ return writeStringUnicodeEscape(w, c)
+ }
+ case c == '"' || c == '\\': // override, gotta escape these
+ return writeStringShortEscape(w, c)
+ default: // obey
+ _, err := w.WriteRune(c)
+ return err
+ }
+ case BackslashEscapeShort:
+ switch c {
+ case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey
+ return writeStringShortEscape(w, c)
+ default: // override, can't short-escape these
+ _, err := w.WriteRune(c)
+ return err
+ }
+ case BackslashEscapeUnicode:
+ switch {
+ case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?)
+ _, err := w.WriteRune(c)
+ return err
+ default: // obey
+ return writeStringUnicodeEscape(w, c)
+ }
+ default:
+ panic("escaper returned an invalid escape mode")
+ }
+}
+
+func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error {
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ for _, c := range str {
+ if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ return err
+ }
+ }
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ return nil
+}
+
+func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error {
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ for i := 0; i < len(str); {
+ c, size := utf8.DecodeRune(str[i:])
+ if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ return err
+ }
+ i += size
+ }
+ if err := w.WriteByte('"'); err != nil {
+ return err
+ }
+ return nil
+}