diff options
Diffstat (limited to 'internal')
-rw-r--r-- | internal/jsonparse/hex.go | 20 | ||||
-rw-r--r-- | internal/jsonparse/parse.go | 28 | ||||
-rw-r--r-- | internal/jsonstring/encode_string.go | 133 |
3 files changed, 148 insertions, 33 deletions
diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go deleted file mode 100644 index 3ed5f01..0000000 --- a/internal/jsonparse/hex.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package jsonparse - -const Hex = "0123456789abcdef" - -func HexToInt(c rune) (byte, bool) { - switch { - case '0' <= c && c <= '9': - return byte(c) - '0', true - case 'a' <= c && c <= 'f': - return byte(c) - 'a' + 10, true - case 'A' <= c && c <= 'F': - return byte(c) - 'A' + 10, true - default: - return 0, false - } -} diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 73584d9..2f5c1ab 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -14,6 +14,12 @@ import ( var ErrParserExceededMaxDepth = errors.New("exceeded max depth") +func isHex(c rune) bool { + return ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F') +} + // RuneType is the classification of a rune when parsing JSON input. // A Parser, rather than grouping runes into tokens and classifying // tokens, classifies runes directly. @@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) } case RuneTypeStringEscU: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUA), nil case RuneTypeStringEscUA: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUB), nil case RuneTypeStringEscUB: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUC), nil case RuneTypeStringEscUC: - if _, ok := HexToInt(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil // number ////////////////////////////////////////////////////////////////////////////////// // // Here's a flattened drawing of the syntax diagram from www.json.org : diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go new file mode 100644 index 0000000..1b0c68a --- /dev/null +++ b/internal/jsonstring/encode_string.go @@ -0,0 +1,133 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonstring + +import ( + "fmt" + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/fastio" +) + +// BackslashEscapeMode is describe in the main lowmemjson package +// docs. +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// BackslashEscaper is describe in the main lowmemjson package docs. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func writeStringUnicodeEscape(w io.Writer, c rune) error { + const alphabet = "0123456789abcdef" + buf := [6]byte{ + '\\', + 'u', + alphabet[(c>>12)&0xf], + alphabet[(c>>8)&0xf], + alphabet[(c>>4)&0xf], + alphabet[(c>>0)&0xf], + } + _, err := w.Write(buf[:]) + return err +} + +func writeStringShortEscape(w io.Writer, c rune) error { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) + } + buf := [2]byte{'\\', b} + _, err := w.Write(buf[:]) + return err +} + +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { + switch escape { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + _, err := w.WriteRune(c) + return err + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + _, err := w.WriteRune(c) + return err + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + _, err := w.WriteRune(c) + return err + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for _, c := range str { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} + +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + i += size + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} |