From a6cd78ec94f76feba180fa75e942bb5cdeae115f Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 21:05:24 -0700 Subject: Move string-encoding to an internal/jsonstring package --- compat/json/testcompat_test.go | 12 +--- encode.go | 19 ++++-- encode_escape.go | 10 +-- encode_string.go | 118 -------------------------------- internal/jsonstring/encode_string.go | 128 +++++++++++++++++++++++++++++++++++ reencode.go | 28 ++++---- 6 files changed, 164 insertions(+), 151 deletions(-) delete mode 100644 encode_string.go create mode 100644 internal/jsonstring/encode_string.go diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go index 07c75bc..42cbf5c 100644 --- a/compat/json/testcompat_test.go +++ b/compat/json/testcompat_test.go @@ -11,8 +11,8 @@ import ( _ "unsafe" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" ) @@ -58,20 +58,14 @@ type encodeState struct { bytes.Buffer } -//go:linkname encodeStringFromString git.lukeshu.com/go/lowmemjson.encodeStringFromString -func encodeStringFromString(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str string) error - -//go:linkname encodeStringFromBytes git.lukeshu.com/go/lowmemjson.encodeStringFromBytes -func encodeStringFromBytes(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str []byte) error - func (es *encodeState) string(str string, _ bool) { - if err := encodeStringFromString(&es.Buffer, nil, str); err != nil { + if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { panic(err) } } func (es *encodeState) stringBytes(str []byte, _ bool) { - if err := encodeStringFromBytes(&es.Buffer, nil, str); err != nil { + if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { panic(err) } } diff --git a/encode.go b/encode.go index d39c862..ebb4568 100644 --- a/encode.go +++ b/encode.go @@ -16,6 +16,7 @@ import ( "strings" "unsafe" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" ) @@ -82,7 +83,11 @@ func (enc *Encoder) Encode(obj any) (err error) { if enc.isRoot { enc.w.par.Reset() } - if err := encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}); err != nil { + escaper := enc.w.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } + if err := encode(enc.w, reflect.ValueOf(obj), escaper, false, 0, map[any]struct{}{}); err != nil { if rwe, ok := err.(*ReEncodeWriteError); ok { err = &EncodeWriteError{ Err: rwe.Err, @@ -192,7 +197,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo Err: err, } } - if err := encodeStringFromBytes(w, escaper, text); err != nil { + if err := jsonstring.EncodeStringFromBytes(w, escaper, text); err != nil { return err } default: @@ -295,14 +300,14 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } else { if quote { var buf bytes.Buffer - if err := encodeStringFromString(&buf, escaper, val.String()); err != nil { + if err := jsonstring.EncodeStringFromString(&buf, escaper, val.String()); err != nil { return err } - if err := encodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { + if err := jsonstring.EncodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { return err } } else { - if err := encodeStringFromString(w, escaper, val.String()); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, val.String()); err != nil { return err } } @@ -336,7 +341,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } } empty = false - if err := encodeStringFromString(w, escaper, field.Name); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, field.Name); err != nil { return err } if err := w.WriteByte(':'); err != nil { @@ -389,7 +394,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } if !strings.HasPrefix(kStr, `"`) { k.Reset() - if err := encodeStringFromString(&k, escaper, kStr); err != nil { + if err := jsonstring.EncodeStringFromString(&k, escaper, kStr); err != nil { return err } kStr = k.String() diff --git a/encode_escape.go b/encode_escape.go index ab0d9c1..0054e72 100644 --- a/encode_escape.go +++ b/encode_escape.go @@ -6,6 +6,8 @@ package lowmemjson import ( "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // BackslashEscapeMode identifies one of the three ways that a @@ -17,12 +19,12 @@ import ( // single-character) // // - as a long Unicode `\uXXXX` backslash sequence -type BackslashEscapeMode uint8 +type BackslashEscapeMode = jsonstring.BackslashEscapeMode const ( - BackslashEscapeNone BackslashEscapeMode = iota - BackslashEscapeShort - BackslashEscapeUnicode + BackslashEscapeNone = jsonstring.BackslashEscapeNone + BackslashEscapeShort = jsonstring.BackslashEscapeShort + BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode ) // A BackslashEscaper controls how a ReEncoder emits a character in a diff --git a/encode_string.go b/encode_string.go deleted file mode 100644 index 208aef4..0000000 --- a/encode_string.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "fmt" - "io" - "unicode/utf8" - - "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" -) - -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { - buf := [6]byte{ - '\\', - 'u', - jsonparse.Hex[(c>>12)&0xf], - jsonparse.Hex[(c>>8)&0xf], - jsonparse.Hex[(c>>4)&0xf], - jsonparse.Hex[(c>>0)&0xf], - } - return w.Write(buf[:]) -} - -func writeStringShortEscape(w io.Writer, c rune) (int, error) { - var b byte - switch c { - case '"', '\\', '/': - b = byte(c) - case '\b': - b = 'b' - case '\f': - b = 'f' - case '\n': - b = 'n' - case '\r': - b = 'r' - case '\t': - b = 't' - default: - panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) - } - buf := [2]byte{'\\', b} - return w.Write(buf[:]) -} - -func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { - if escaper == nil { - escaper = EscapeDefault - } - switch escaper(c, wasEscaped) { - case BackslashEscapeNone: - switch { - case c < 0x0020: // override, gotta escape these - switch c { - case '\b', '\f', '\n', '\r', '\t': // short-escape if possible - return writeStringShortEscape(w, c) - default: - return writeStringUnicodeEscape(w, c) - } - case c == '"' || c == '\\': // override, gotta escape these - return writeStringShortEscape(w, c) - default: // obey - return w.WriteRune(c) - } - case BackslashEscapeShort: - switch c { - case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey - return writeStringShortEscape(w, c) - default: // override, can't short-escape these - return w.WriteRune(c) - } - case BackslashEscapeUnicode: - switch { - case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return w.WriteRune(c) - default: // obey - return writeStringUnicodeEscape(w, c) - } - default: - panic("escaper returned an invalid escape mode") - } -} - -func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { - if err := w.WriteByte('"'); err != nil { - return err - } - for _, c := range str { - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - return err - } - } - if err := w.WriteByte('"'); err != nil { - return err - } - return nil -} - -func encodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { - if err := w.WriteByte('"'); err != nil { - return err - } - for i := 0; i < len(str); { - c, size := utf8.DecodeRune(str[i:]) - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - return err - } - i += size - } - if err := w.WriteByte('"'); err != nil { - return err - } - return nil -} diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go new file mode 100644 index 0000000..f29dc3f --- /dev/null +++ b/internal/jsonstring/encode_string.go @@ -0,0 +1,128 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonstring + +import ( + "fmt" + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +// BackslashEscapeMode is describe in the main lowmemjson package +// docs. +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// BackslashEscaper is describe in the main lowmemjson package docs. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + buf := [6]byte{ + '\\', + 'u', + jsonparse.Hex[(c>>12)&0xf], + jsonparse.Hex[(c>>8)&0xf], + jsonparse.Hex[(c>>4)&0xf], + jsonparse.Hex[(c>>0)&0xf], + } + return w.Write(buf[:]) +} + +func writeStringShortEscape(w io.Writer, c rune) (int, error) { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) + } + buf := [2]byte{'\\', b} + return w.Write(buf[:]) +} + +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) { + switch escape { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + return w.WriteRune(c) + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + return w.WriteRune(c) + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + return w.WriteRune(c) + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for _, c := range str { + if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} + +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + i += size + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} diff --git a/reencode.go b/reencode.go index d19dc1a..f100275 100644 --- a/reencode.go +++ b/reencode.go @@ -12,6 +12,7 @@ import ( "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -499,34 +500,35 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { + escaper := enc.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } var err error switch t { case jsonparse.RuneTypeStringChar: - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing case jsonparse.RuneTypeStringEsc1: switch c { - case '"': - err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) - case '\\': - err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) - case '/': - err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + case '"', '\\', '/': + // self case 'b': - err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + c = '\b' case 'f': - err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + c = '\f' case 'n': - err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + c = '\n' case 'r': - err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + c = '\r' case 't': - err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + c = '\t' default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } + err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))) case jsonparse.RuneTypeStringEscUA: enc.uhex[0], _ = jsonparse.HexToInt(c) case jsonparse.RuneTypeStringEscUB: @@ -540,7 +542,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { rune(enc.uhex[1])<<8 | rune(enc.uhex[2])<<4 | rune(enc.uhex[3])<<0 - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough -- cgit v1.2.3-2-g168b