diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-16 22:30:54 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-16 22:30:54 -0700 |
commit | debef01cc500fb9368e1d6d0206a32ca358a8c98 (patch) | |
tree | f021ae7890922e10a1aa119dcdbd7dd2a587f09e /internal/jsonstring | |
parent | d7414035894f378c9e1d48b04a767f61b082186a (diff) | |
parent | f823342d5b9c2ca376d038471889176ab74acf1b (diff) |
Merge branch 'lukeshu/misc'
Diffstat (limited to 'internal/jsonstring')
-rw-r--r-- | internal/jsonstring/encode_string.go | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go new file mode 100644 index 0000000..1b0c68a --- /dev/null +++ b/internal/jsonstring/encode_string.go @@ -0,0 +1,133 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonstring + +import ( + "fmt" + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/fastio" +) + +// BackslashEscapeMode is describe in the main lowmemjson package +// docs. +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// BackslashEscaper is describe in the main lowmemjson package docs. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func writeStringUnicodeEscape(w io.Writer, c rune) error { + const alphabet = "0123456789abcdef" + buf := [6]byte{ + '\\', + 'u', + alphabet[(c>>12)&0xf], + alphabet[(c>>8)&0xf], + alphabet[(c>>4)&0xf], + alphabet[(c>>0)&0xf], + } + _, err := w.Write(buf[:]) + return err +} + +func writeStringShortEscape(w io.Writer, c rune) error { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) + } + buf := [2]byte{'\\', b} + _, err := w.Write(buf[:]) + return err +} + +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { + switch escape { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + _, err := w.WriteRune(c) + return err + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + _, err := w.WriteRune(c) + return err + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + _, err := w.WriteRune(c) + return err + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for _, c := range str { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} + +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + i += size + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} |