From a6cd78ec94f76feba180fa75e942bb5cdeae115f Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 21:05:24 -0700 Subject: Move string-encoding to an internal/jsonstring package --- internal/jsonstring/encode_string.go | 128 +++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 internal/jsonstring/encode_string.go (limited to 'internal/jsonstring/encode_string.go') diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go new file mode 100644 index 0000000..f29dc3f --- /dev/null +++ b/internal/jsonstring/encode_string.go @@ -0,0 +1,128 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonstring + +import ( + "fmt" + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +// BackslashEscapeMode is describe in the main lowmemjson package +// docs. +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// BackslashEscaper is describe in the main lowmemjson package docs. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + buf := [6]byte{ + '\\', + 'u', + jsonparse.Hex[(c>>12)&0xf], + jsonparse.Hex[(c>>8)&0xf], + jsonparse.Hex[(c>>4)&0xf], + jsonparse.Hex[(c>>0)&0xf], + } + return w.Write(buf[:]) +} + +func writeStringShortEscape(w io.Writer, c rune) (int, error) { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) + } + buf := [2]byte{'\\', b} + return w.Write(buf[:]) +} + +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) { + switch escape { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + return w.WriteRune(c) + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + return w.WriteRune(c) + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + return w.WriteRune(c) + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for _, c := range str { + if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} + +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + i += size + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} -- cgit v1.2.3-2-g168b From 2b7fff828e29b63ae08a871b4b1e74784fab29e5 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 19:06:46 -0700 Subject: Clean up the hex handling --- internal/jsonstring/encode_string.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'internal/jsonstring/encode_string.go') diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index f29dc3f..a7670c6 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -10,7 +10,6 @@ import ( "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) // BackslashEscapeMode is describe in the main lowmemjson package @@ -27,13 +26,14 @@ const ( type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', 'u', - jsonparse.Hex[(c>>12)&0xf], - jsonparse.Hex[(c>>8)&0xf], - jsonparse.Hex[(c>>4)&0xf], - jsonparse.Hex[(c>>0)&0xf], + alphabet[(c>>12)&0xf], + alphabet[(c>>8)&0xf], + alphabet[(c>>4)&0xf], + alphabet[(c>>0)&0xf], } return w.Write(buf[:]) } -- cgit v1.2.3-2-g168b From f823342d5b9c2ca376d038471889176ab74acf1b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 15 Feb 2023 15:10:00 -0700 Subject: reencode: Don't bother tracking the number of bytes written --- internal/jsonstring/encode_string.go | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'internal/jsonstring/encode_string.go') diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index a7670c6..1b0c68a 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -25,7 +25,7 @@ const ( // BackslashEscaper is describe in the main lowmemjson package docs. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { +func writeStringUnicodeEscape(w io.Writer, c rune) error { const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', @@ -35,10 +35,11 @@ func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { alphabet[(c>>4)&0xf], alphabet[(c>>0)&0xf], } - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func writeStringShortEscape(w io.Writer, c rune) (int, error) { +func writeStringShortEscape(w io.Writer, c rune) error { var b byte switch c { case '"', '\\', '/': @@ -57,10 +58,11 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) } buf := [2]byte{'\\', b} - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) { +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { switch escape { case BackslashEscapeNone: switch { @@ -74,19 +76,22 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (in case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err default: // obey return writeStringUnicodeEscape(w, c) } @@ -100,7 +105,7 @@ func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st return err } for _, c := range str { - if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } } @@ -116,7 +121,7 @@ func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []b } for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) - if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } i += size -- cgit v1.2.3-2-g168b