From c904fc3e8605ec95c0fac92654d773e8456bf130 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 14 Feb 2023 18:39:25 -0700 Subject: mv compat/json/{compat,testcompat}_test.go --- compat/json/compat_test.go | 86 ------------------------------------------ compat/json/testcompat_test.go | 86 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 86 deletions(-) delete mode 100644 compat/json/compat_test.go create mode 100644 compat/json/testcompat_test.go diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go deleted file mode 100644 index 07c75bc..0000000 --- a/compat/json/compat_test.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package json - -import ( - "bytes" - "encoding/json" - "io" - _ "unsafe" - - "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" - "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" -) - -var ( - parseTag = jsonstruct.ParseTag - clearCache = jsonstruct.ClearCache - cachedTypeFields = jsonstruct.IndexStruct -) - -type scanner = lowmemjson.ReEncoderConfig - -func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { - return reencode(io.Discard, in, *scan) -} - -func isValidNumber(s string) bool { - var parser jsonparse.Parser - for _, r := range s { - if t, _ := parser.HandleRune(r); !t.IsNumber() { - return false - } - } - if t, _ := parser.HandleEOF(); t == jsonparse.RuneTypeError { - return false - } - return true -} - -const ( - startDetectingCyclesAfter = 1000 -) - -func isSpace(c byte) bool { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return true - default: - return false - } -} - -type encodeState struct { - bytes.Buffer -} - -//go:linkname encodeStringFromString git.lukeshu.com/go/lowmemjson.encodeStringFromString -func encodeStringFromString(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str string) error - -//go:linkname encodeStringFromBytes git.lukeshu.com/go/lowmemjson.encodeStringFromBytes -func encodeStringFromBytes(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str []byte) error - -func (es *encodeState) string(str string, _ bool) { - if err := encodeStringFromString(&es.Buffer, nil, str); err != nil { - panic(err) - } -} - -func (es *encodeState) stringBytes(str []byte, _ bool) { - if err := encodeStringFromBytes(&es.Buffer, nil, str); err != nil { - panic(err) - } -} - -type ( - Token = json.Token - Delim = json.Delim -) - -func (dec *Decoder) Token() (Token, error) { - panic("not implemented") -} diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go new file mode 100644 index 0000000..07c75bc --- /dev/null +++ b/compat/json/testcompat_test.go @@ -0,0 +1,86 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package json + +import ( + "bytes" + "encoding/json" + "io" + _ "unsafe" + + "git.lukeshu.com/go/lowmemjson" + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" +) + +var ( + parseTag = jsonstruct.ParseTag + clearCache = jsonstruct.ClearCache + cachedTypeFields = jsonstruct.IndexStruct +) + +type scanner = lowmemjson.ReEncoderConfig + +func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { + return reencode(io.Discard, in, *scan) +} + +func isValidNumber(s string) bool { + var parser jsonparse.Parser + for _, r := range s { + if t, _ := parser.HandleRune(r); !t.IsNumber() { + return false + } + } + if t, _ := parser.HandleEOF(); t == jsonparse.RuneTypeError { + return false + } + return true +} + +const ( + startDetectingCyclesAfter = 1000 +) + +func isSpace(c byte) bool { + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return true + default: + return false + } +} + +type encodeState struct { + bytes.Buffer +} + +//go:linkname encodeStringFromString git.lukeshu.com/go/lowmemjson.encodeStringFromString +func encodeStringFromString(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str string) error + +//go:linkname encodeStringFromBytes git.lukeshu.com/go/lowmemjson.encodeStringFromBytes +func encodeStringFromBytes(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str []byte) error + +func (es *encodeState) string(str string, _ bool) { + if err := encodeStringFromString(&es.Buffer, nil, str); err != nil { + panic(err) + } +} + +func (es *encodeState) stringBytes(str []byte, _ bool) { + if err := encodeStringFromBytes(&es.Buffer, nil, str); err != nil { + panic(err) + } +} + +type ( + Token = json.Token + Delim = json.Delim +) + +func (dec *Decoder) Token() (Token, error) { + panic("not implemented") +} -- cgit v1.1-4-g5e80 From a6cd78ec94f76feba180fa75e942bb5cdeae115f Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 21:05:24 -0700 Subject: Move string-encoding to an internal/jsonstring package --- compat/json/testcompat_test.go | 12 +--- encode.go | 19 ++++-- encode_escape.go | 10 +-- encode_string.go | 118 -------------------------------- internal/jsonstring/encode_string.go | 128 +++++++++++++++++++++++++++++++++++ reencode.go | 28 ++++---- 6 files changed, 164 insertions(+), 151 deletions(-) delete mode 100644 encode_string.go create mode 100644 internal/jsonstring/encode_string.go diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go index 07c75bc..42cbf5c 100644 --- a/compat/json/testcompat_test.go +++ b/compat/json/testcompat_test.go @@ -11,8 +11,8 @@ import ( _ "unsafe" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" ) @@ -58,20 +58,14 @@ type encodeState struct { bytes.Buffer } -//go:linkname encodeStringFromString git.lukeshu.com/go/lowmemjson.encodeStringFromString -func encodeStringFromString(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str string) error - -//go:linkname encodeStringFromBytes git.lukeshu.com/go/lowmemjson.encodeStringFromBytes -func encodeStringFromBytes(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str []byte) error - func (es *encodeState) string(str string, _ bool) { - if err := encodeStringFromString(&es.Buffer, nil, str); err != nil { + if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { panic(err) } } func (es *encodeState) stringBytes(str []byte, _ bool) { - if err := encodeStringFromBytes(&es.Buffer, nil, str); err != nil { + if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { panic(err) } } diff --git a/encode.go b/encode.go index d39c862..ebb4568 100644 --- a/encode.go +++ b/encode.go @@ -16,6 +16,7 @@ import ( "strings" "unsafe" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" ) @@ -82,7 +83,11 @@ func (enc *Encoder) Encode(obj any) (err error) { if enc.isRoot { enc.w.par.Reset() } - if err := encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}); err != nil { + escaper := enc.w.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } + if err := encode(enc.w, reflect.ValueOf(obj), escaper, false, 0, map[any]struct{}{}); err != nil { if rwe, ok := err.(*ReEncodeWriteError); ok { err = &EncodeWriteError{ Err: rwe.Err, @@ -192,7 +197,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo Err: err, } } - if err := encodeStringFromBytes(w, escaper, text); err != nil { + if err := jsonstring.EncodeStringFromBytes(w, escaper, text); err != nil { return err } default: @@ -295,14 +300,14 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } else { if quote { var buf bytes.Buffer - if err := encodeStringFromString(&buf, escaper, val.String()); err != nil { + if err := jsonstring.EncodeStringFromString(&buf, escaper, val.String()); err != nil { return err } - if err := encodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { + if err := jsonstring.EncodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { return err } } else { - if err := encodeStringFromString(w, escaper, val.String()); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, val.String()); err != nil { return err } } @@ -336,7 +341,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } } empty = false - if err := encodeStringFromString(w, escaper, field.Name); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, field.Name); err != nil { return err } if err := w.WriteByte(':'); err != nil { @@ -389,7 +394,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } if !strings.HasPrefix(kStr, `"`) { k.Reset() - if err := encodeStringFromString(&k, escaper, kStr); err != nil { + if err := jsonstring.EncodeStringFromString(&k, escaper, kStr); err != nil { return err } kStr = k.String() diff --git a/encode_escape.go b/encode_escape.go index ab0d9c1..0054e72 100644 --- a/encode_escape.go +++ b/encode_escape.go @@ -6,6 +6,8 @@ package lowmemjson import ( "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // BackslashEscapeMode identifies one of the three ways that a @@ -17,12 +19,12 @@ import ( // single-character) // // - as a long Unicode `\uXXXX` backslash sequence -type BackslashEscapeMode uint8 +type BackslashEscapeMode = jsonstring.BackslashEscapeMode const ( - BackslashEscapeNone BackslashEscapeMode = iota - BackslashEscapeShort - BackslashEscapeUnicode + BackslashEscapeNone = jsonstring.BackslashEscapeNone + BackslashEscapeShort = jsonstring.BackslashEscapeShort + BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode ) // A BackslashEscaper controls how a ReEncoder emits a character in a diff --git a/encode_string.go b/encode_string.go deleted file mode 100644 index 208aef4..0000000 --- a/encode_string.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "fmt" - "io" - "unicode/utf8" - - "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" -) - -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { - buf := [6]byte{ - '\\', - 'u', - jsonparse.Hex[(c>>12)&0xf], - jsonparse.Hex[(c>>8)&0xf], - jsonparse.Hex[(c>>4)&0xf], - jsonparse.Hex[(c>>0)&0xf], - } - return w.Write(buf[:]) -} - -func writeStringShortEscape(w io.Writer, c rune) (int, error) { - var b byte - switch c { - case '"', '\\', '/': - b = byte(c) - case '\b': - b = 'b' - case '\f': - b = 'f' - case '\n': - b = 'n' - case '\r': - b = 'r' - case '\t': - b = 't' - default: - panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) - } - buf := [2]byte{'\\', b} - return w.Write(buf[:]) -} - -func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { - if escaper == nil { - escaper = EscapeDefault - } - switch escaper(c, wasEscaped) { - case BackslashEscapeNone: - switch { - case c < 0x0020: // override, gotta escape these - switch c { - case '\b', '\f', '\n', '\r', '\t': // short-escape if possible - return writeStringShortEscape(w, c) - default: - return writeStringUnicodeEscape(w, c) - } - case c == '"' || c == '\\': // override, gotta escape these - return writeStringShortEscape(w, c) - default: // obey - return w.WriteRune(c) - } - case BackslashEscapeShort: - switch c { - case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey - return writeStringShortEscape(w, c) - default: // override, can't short-escape these - return w.WriteRune(c) - } - case BackslashEscapeUnicode: - switch { - case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return w.WriteRune(c) - default: // obey - return writeStringUnicodeEscape(w, c) - } - default: - panic("escaper returned an invalid escape mode") - } -} - -func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { - if err := w.WriteByte('"'); err != nil { - return err - } - for _, c := range str { - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - return err - } - } - if err := w.WriteByte('"'); err != nil { - return err - } - return nil -} - -func encodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { - if err := w.WriteByte('"'); err != nil { - return err - } - for i := 0; i < len(str); { - c, size := utf8.DecodeRune(str[i:]) - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - return err - } - i += size - } - if err := w.WriteByte('"'); err != nil { - return err - } - return nil -} diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go new file mode 100644 index 0000000..f29dc3f --- /dev/null +++ b/internal/jsonstring/encode_string.go @@ -0,0 +1,128 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonstring + +import ( + "fmt" + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +// BackslashEscapeMode is describe in the main lowmemjson package +// docs. +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// BackslashEscaper is describe in the main lowmemjson package docs. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + buf := [6]byte{ + '\\', + 'u', + jsonparse.Hex[(c>>12)&0xf], + jsonparse.Hex[(c>>8)&0xf], + jsonparse.Hex[(c>>4)&0xf], + jsonparse.Hex[(c>>0)&0xf], + } + return w.Write(buf[:]) +} + +func writeStringShortEscape(w io.Writer, c rune) (int, error) { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) + } + buf := [2]byte{'\\', b} + return w.Write(buf[:]) +} + +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) { + switch escape { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + return w.WriteRune(c) + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + return w.WriteRune(c) + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + return w.WriteRune(c) + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for _, c := range str { + if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} + +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + return err + } + i += size + } + if err := w.WriteByte('"'); err != nil { + return err + } + return nil +} diff --git a/reencode.go b/reencode.go index d19dc1a..f100275 100644 --- a/reencode.go +++ b/reencode.go @@ -12,6 +12,7 @@ import ( "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -499,34 +500,35 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { + escaper := enc.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } var err error switch t { case jsonparse.RuneTypeStringChar: - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing case jsonparse.RuneTypeStringEsc1: switch c { - case '"': - err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) - case '\\': - err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) - case '/': - err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + case '"', '\\', '/': + // self case 'b': - err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + c = '\b' case 'f': - err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + c = '\f' case 'n': - err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + c = '\n' case 'r': - err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + c = '\r' case 't': - err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + c = '\t' default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } + err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))) case jsonparse.RuneTypeStringEscUA: enc.uhex[0], _ = jsonparse.HexToInt(c) case jsonparse.RuneTypeStringEscUB: @@ -540,7 +542,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { rune(enc.uhex[1])<<8 | rune(enc.uhex[2])<<4 | rune(enc.uhex[3])<<0 - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough -- cgit v1.1-4-g5e80 From 851452243879498117cd7e71ac12856af44657cb Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 14 Feb 2023 11:44:36 -0700 Subject: reencode: Don't have the ReEncoderConfig member be public --- ReleaseNotes.md | 24 ++++++++++++++---------- compat/json/compat.go | 43 +++++++++++++++++++++++++++---------------- encode.go | 2 +- reencode.go | 38 +++++++++++++++++++------------------- 4 files changed, 61 insertions(+), 46 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index e00bf10..bee16c4 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -4,16 +4,20 @@ User-facing changes: - - `Number` and `RawMessage` type aliases are now available, so that - a user of lowmemjson's native APIs does not need to import - `encoding/json` or compat/json in order to use them. - - - Encoder, ReEncoder: If there was an error writing to the output - stream, it may have returned a `*ReEncodeSyntaxError` even though - it's not a syntax issue, or may have returned the underlying - error without wrapping it. If there is an error writing to the - output, Encoder and ReEncoder now return `*EncodeWriteError` and - `*ReEncodeWriteError` respectively. + - Change: ReEncoder: The ReEncoderConfig struct member is no longer + public. + + - Feature: `Number` and `RawMessage` type aliases are now + available, so that a user of lowmemjson's native APIs does not + need to import `encoding/json` or compat/json in order to use + them. + + - Bigfix: Encoder, ReEncoder: If there was an error writing to the + output stream, it may have returned a `*ReEncodeSyntaxError` even + though it's not a syntax issue, or may have returned the + underlying error without wrapping it. If there is an error + writing to the output, Encoder and ReEncoder now return + `*EncodeWriteError` and `*ReEncodeWriteError` respectively. # v0.3.5 (2023-02-10) diff --git a/compat/json/compat.go b/compat/json/compat.go index 3678135..c96470d 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -72,8 +72,11 @@ func Marshal(v any) ([]byte, error) { } type Encoder struct { - out io.Writer - buf bytes.Buffer + out io.Writer + buf bytes.Buffer + + cfg lowmemjson.ReEncoderConfig + encoder *lowmemjson.Encoder formatter *lowmemjson.ReEncoder } @@ -81,17 +84,23 @@ type Encoder struct { func NewEncoder(w io.Writer) *Encoder { ret := &Encoder{ out: w, - } - ret.formatter = lowmemjson.NewReEncoder(&ret.buf, lowmemjson.ReEncoderConfig{ - AllowMultipleValues: true, - Compact: true, - ForceTrailingNewlines: true, - }) - ret.encoder = lowmemjson.NewEncoder(ret.formatter) + cfg: lowmemjson.ReEncoderConfig{ + AllowMultipleValues: true, + + Compact: true, + ForceTrailingNewlines: true, + }, + } + ret.refreshConfig() return ret } +func (enc *Encoder) refreshConfig() { + enc.formatter = lowmemjson.NewReEncoder(&enc.buf, enc.cfg) + enc.encoder = lowmemjson.NewEncoder(enc.formatter) +} + func (enc *Encoder) Encode(v any) error { if err := convertEncodeError(enc.encoder.Encode(v)); err != nil { enc.buf.Reset() @@ -104,17 +113,19 @@ func (enc *Encoder) Encode(v any) error { } func (enc *Encoder) SetEscapeHTML(on bool) { - var escaper lowmemjson.BackslashEscaper - if !on { - escaper = lowmemjson.EscapeDefaultNonHTMLSafe + if on { + enc.cfg.BackslashEscape = lowmemjson.EscapeDefault + } else { + enc.cfg.BackslashEscape = lowmemjson.EscapeDefaultNonHTMLSafe } - enc.formatter.BackslashEscape = escaper + enc.refreshConfig() } func (enc *Encoder) SetIndent(prefix, indent string) { - enc.formatter.Compact = prefix == "" && indent == "" - enc.formatter.Prefix = prefix - enc.formatter.Indent = indent + enc.cfg.Compact = prefix == "" && indent == "" + enc.cfg.Prefix = prefix + enc.cfg.Indent = indent + enc.refreshConfig() } // ReEncode wrappers ///////////////////////////////////////////////// diff --git a/encode.go b/encode.go index ebb4568..2e10134 100644 --- a/encode.go +++ b/encode.go @@ -83,7 +83,7 @@ func (enc *Encoder) Encode(obj any) (err error) { if enc.isRoot { enc.w.par.Reset() } - escaper := enc.w.BackslashEscape + escaper := enc.w.cfg.BackslashEscape if escaper == nil { escaper = EscapeDefault } diff --git a/reencode.go b/reencode.go index f100275..32796f6 100644 --- a/reencode.go +++ b/reencode.go @@ -72,9 +72,9 @@ type ReEncoderConfig struct { // bufio.Writer. func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ - ReEncoderConfig: cfg, - out: fastio.NewAllWriter(out), - specu: new(speculation), + cfg: cfg, + out: fastio.NewAllWriter(out), + specu: new(speculation), } } @@ -87,7 +87,7 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // // The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { - ReEncoderConfig + cfg ReEncoderConfig out fastio.AllWriter // state: .Write's and .WriteString's utf8-decoding buffer @@ -239,7 +239,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if enc.AllowMultipleValues { + if enc.cfg.AllowMultipleValues { enc.par.Reset() } } @@ -283,7 +283,7 @@ rehandle: return enc.written, enc.err } if t == jsonparse.RuneTypeEOF { - if enc.AllowMultipleValues && len(enc.barriers) == 0 { + if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { enc.par.Reset() goto rehandle } else { @@ -328,7 +328,7 @@ func (enc *ReEncoder) stackSize() int { } func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error { - if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { + if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -342,10 +342,10 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er enc.specu.speculating = true enc.specu.endWhenStackSize = stackSize - 1 enc.specu.fmt = ReEncoder{ - ReEncoderConfig: enc.ReEncoderConfig, - out: &enc.specu.compact, + cfg: enc.cfg, + out: &enc.specu.compact, } - enc.specu.fmt.Compact = true + enc.specu.fmt.cfg.Compact = true enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, @@ -369,7 +369,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er return err } switch { - case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent buf := append([]inputTuple(nil), enc.specu.buf...) enc.specu.Reset() if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { @@ -414,7 +414,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) if err := enc.emitByte('\n'); err != nil { return err, false } - case enc.Indent != "" && !enc.Compact: + case enc.cfg.Indent != "" && !enc.cfg.Compact: if err := enc.emitByte('\n'); err != nil { return err, false } @@ -458,11 +458,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // whitespace switch { - case enc.Compact: + case enc.cfg.Compact: if t == jsonparse.RuneTypeSpace { return nil, false } - case enc.Indent != "": + case enc.cfg.Indent != "": switch t { case jsonparse.RuneTypeSpace: // let us manage whitespace, don't pass it through @@ -500,7 +500,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { - escaper := enc.BackslashEscape + escaper := enc.cfg.BackslashEscape if escaper == nil { escaper = EscapeDefault } @@ -549,7 +549,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines && len(enc.barriers) == 0: + case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) err = enc.emitByte('\n') default: @@ -585,13 +585,13 @@ func (enc *ReEncoder) emitNlIndent() error { if err := enc.emitByte('\n'); err != nil { return err } - if enc.Prefix != "" { - if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil { + if enc.cfg.Prefix != "" { + if err := enc.emit(enc.out.WriteString(enc.cfg.Prefix)); err != nil { return err } } for i := 0; i < enc.curIndent; i++ { - if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { + if err := enc.emit(enc.out.WriteString(enc.cfg.Indent)); err != nil { return err } } -- cgit v1.1-4-g5e80 From 1e2d058c78969118b099940afdb100a3b93325cc Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 14 Feb 2023 21:40:18 -0700 Subject: reencode: Update the doc comment about all the interfaces implemented --- reencode.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reencode.go b/reencode.go index 32796f6..f23c85a 100644 --- a/reencode.go +++ b/reencode.go @@ -79,8 +79,8 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { } // A ReEncoder takes a stream of JSON elements (by way of implementing -// io.Writer and WriteRune), and re-encodes the JSON, writing it to -// the .Out member. +// io.Writer, io.StringWriter, io.ByteWriter, and WriteRune), and +// re-encodes the JSON, writing it to the .Out member. // // This is useful for prettifying, minifying, sanitizing, and/or // validating JSON. -- cgit v1.1-4-g5e80 From 2b7fff828e29b63ae08a871b4b1e74784fab29e5 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 19:06:46 -0700 Subject: Clean up the hex handling --- decode.go | 28 +++++++++------------------- encode_escape.go | 22 ++++++++++++++++++++++ internal/jsonparse/hex.go | 20 -------------------- internal/jsonparse/parse.go | 28 +++++++++++++++------------- internal/jsonstring/encode_string.go | 10 +++++----- reencode.go | 15 +++++---------- 6 files changed, 56 insertions(+), 67 deletions(-) delete mode 100644 internal/jsonparse/hex.go diff --git a/decode.go b/decode.go index 1ff8938..8514ec4 100644 --- a/decode.go +++ b/decode.go @@ -1145,7 +1145,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco if err := dec.expectRuneType('"', jsonparse.RuneTypeStringBeg, gTyp); err != nil { return err } - var uhex [4]byte + var uhex [3]byte for { c, t, err := dec.readRune() if err != nil { @@ -1178,18 +1178,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco panic(fmt.Errorf("should not happen: unexpected rune after backslash: %q", c)) } case jsonparse.RuneTypeStringEscUA: - uhex[0], _ = jsonparse.HexToInt(c) + uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - uhex[1], _ = jsonparse.HexToInt(c) + uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - uhex[2], _ = jsonparse.HexToInt(c) + uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - uhex[3], _ = jsonparse.HexToInt(c) - c = 0 | - rune(uhex[0])<<12 | - rune(uhex[1])<<8 | - rune(uhex[2])<<4 | - rune(uhex[3])<<0 + c = hexToRune(uhex[0], uhex[1], uhex[2], byte(c)) handleUnicode: if utf16.IsSurrogate(c) { t, err := dec.peekRuneType() @@ -1219,27 +1214,22 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco if err != nil { return err } - uhex[0], _ = jsonparse.HexToInt(b) + uhex[0] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[1], _ = jsonparse.HexToInt(b) + uhex[1] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[2], _ = jsonparse.HexToInt(b) + uhex[2] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[3], _ = jsonparse.HexToInt(b) - c2 := 0 | - rune(uhex[0])<<12 | - rune(uhex[1])<<8 | - rune(uhex[2])<<4 | - rune(uhex[3])<<0 + c2 := hexToRune(uhex[0], uhex[1], uhex[2], byte(b)) d := utf16.DecodeRune(c, c2) if d == utf8.RuneError { _, _ = out.WriteRune(utf8.RuneError) diff --git a/encode_escape.go b/encode_escape.go index 0054e72..97da6e9 100644 --- a/encode_escape.go +++ b/encode_escape.go @@ -5,6 +5,7 @@ package lowmemjson import ( + "fmt" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/jsonstring" @@ -27,6 +28,27 @@ const ( BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode ) +func hexToInt(c byte) rune { + switch { + case '0' <= c && c <= '9': + return rune(c) - '0' + case 'a' <= c && c <= 'f': + return rune(c) - 'a' + 10 + case 'A' <= c && c <= 'F': + return rune(c) - 'A' + 10 + default: + panic(fmt.Errorf("should not happen: invalid hex char: %q", c)) + } +} + +func hexToRune(a, b, c, d byte) rune { + return 0 | + hexToInt(a)<<12 | + hexToInt(b)<<8 | + hexToInt(c)<<4 | + hexToInt(d)<<0 +} + // A BackslashEscaper controls how a ReEncoder emits a character in a // JSON string. The `rune` argument is the character being // considered, and the `BackslashEscapeMode` argument is how it was diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go deleted file mode 100644 index 3ed5f01..0000000 --- a/internal/jsonparse/hex.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package jsonparse - -const Hex = "0123456789abcdef" - -func HexToInt(c rune) (byte, bool) { - switch { - case '0' <= c && c <= '9': - return byte(c) - '0', true - case 'a' <= c && c <= 'f': - return byte(c) - 'a' + 10, true - case 'A' <= c && c <= 'F': - return byte(c) - 'A' + 10, true - default: - return 0, false - } -} diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 73584d9..2f5c1ab 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -14,6 +14,12 @@ import ( var ErrParserExceededMaxDepth = errors.New("exceeded max depth") +func isHex(c rune) bool { + return ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F') +} + // RuneType is the classification of a rune when parsing JSON input. // A Parser, rather than grouping runes into tokens and classifying // tokens, classifies runes directly. @@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) } case RuneTypeStringEscU: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUA), nil case RuneTypeStringEscUA: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUB), nil case RuneTypeStringEscUB: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUC), nil case RuneTypeStringEscUC: - if _, ok := HexToInt(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil // number ////////////////////////////////////////////////////////////////////////////////// // // Here's a flattened drawing of the syntax diagram from www.json.org : diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index f29dc3f..a7670c6 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -10,7 +10,6 @@ import ( "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) // BackslashEscapeMode is describe in the main lowmemjson package @@ -27,13 +26,14 @@ const ( type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', 'u', - jsonparse.Hex[(c>>12)&0xf], - jsonparse.Hex[(c>>8)&0xf], - jsonparse.Hex[(c>>4)&0xf], - jsonparse.Hex[(c>>0)&0xf], + alphabet[(c>>12)&0xf], + alphabet[(c>>8)&0xf], + alphabet[(c>>4)&0xf], + alphabet[(c>>0)&0xf], } return w.Write(buf[:]) } diff --git a/reencode.go b/reencode.go index f23c85a..4974cb7 100644 --- a/reencode.go +++ b/reencode.go @@ -105,7 +105,7 @@ type ReEncoder struct { lastNonSpaceNonEOF jsonparse.RuneType wasNumber bool curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings + uhex [3]byte // "\uABCD"-encoded characters in strings fracZeros int64 expZero bool specu *speculation @@ -530,18 +530,13 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { } err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))) case jsonparse.RuneTypeStringEscUA: - enc.uhex[0], _ = jsonparse.HexToInt(c) + enc.uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - enc.uhex[1], _ = jsonparse.HexToInt(c) + enc.uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - enc.uhex[2], _ = jsonparse.HexToInt(c) + enc.uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - enc.uhex[3], _ = jsonparse.HexToInt(c) - c := 0 | - rune(enc.uhex[0])<<12 | - rune(enc.uhex[1])<<8 | - rune(enc.uhex[2])<<4 | - rune(enc.uhex[3])<<0 + c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() -- cgit v1.1-4-g5e80 From f823342d5b9c2ca376d038471889176ab74acf1b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 15 Feb 2023 15:10:00 -0700 Subject: reencode: Don't bother tracking the number of bytes written --- ReleaseNotes.md | 5 ++ internal/jsonstring/encode_string.go | 25 ++++---- reencode.go | 108 +++++++++++++---------------------- 3 files changed, 61 insertions(+), 77 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index bee16c4..da35130 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -7,6 +7,11 @@ - Change: ReEncoder: The ReEncoderConfig struct member is no longer public. + - Change: ReEncoder: `WriteRune` may now be called even if there is + a partial UTF-8 codepoint from a `Write` or `WriteString` call, + but now simply returns the width of the rune, rather than the + number of bytes actually written. + - Feature: `Number` and `RawMessage` type aliases are now available, so that a user of lowmemjson's native APIs does not need to import `encoding/json` or compat/json in order to use diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index a7670c6..1b0c68a 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -25,7 +25,7 @@ const ( // BackslashEscaper is describe in the main lowmemjson package docs. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { +func writeStringUnicodeEscape(w io.Writer, c rune) error { const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', @@ -35,10 +35,11 @@ func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { alphabet[(c>>4)&0xf], alphabet[(c>>0)&0xf], } - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func writeStringShortEscape(w io.Writer, c rune) (int, error) { +func writeStringShortEscape(w io.Writer, c rune) error { var b byte switch c { case '"', '\\', '/': @@ -57,10 +58,11 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) } buf := [2]byte{'\\', b} - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) { +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { switch escape { case BackslashEscapeNone: switch { @@ -74,19 +76,22 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (in case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err default: // obey return writeStringUnicodeEscape(w, c) } @@ -100,7 +105,7 @@ func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st return err } for _, c := range str { - if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } } @@ -116,7 +121,7 @@ func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []b } for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) - if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } i += size diff --git a/reencode.go b/reencode.go index 4974cb7..f18888c 100644 --- a/reencode.go +++ b/reencode.go @@ -90,14 +90,13 @@ type ReEncoder struct { cfg ReEncoderConfig out fastio.AllWriter - // state: .Write's and .WriteString's utf8-decoding buffer + // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int - // state: .WriteRune + // state: contract between the public API and .handleRune err error par jsonparse.Parser - written int inputPos int64 // state: .handleRune @@ -166,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRune(p[n:]) { c, size := utf8.DecodeRune(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } @@ -193,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRuneInString(p[n:]) { c, size := utf8.DecodeRuneInString(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } - enc.bufLen = copy(enc.buf[:], p[n:]) return len(p), nil } @@ -213,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error { return fastio.WriteByte(enc, b) } +// WriteRune does what you'd expect. +func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { + return fastio.WriteRune(enc, c) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -232,7 +239,7 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, @@ -246,26 +253,7 @@ func (enc *ReEncoder) Close() error { return nil } -// WriteRune writes a single Unicode code point, returning the number -// of bytes written to the output stream and any error. -// -// Even when there is no error, the number of bytes written may be -// zero (for example, when the rune is whitespace and the ReEncoder is -// minifying the JSON), or it may be substantially longer than one -// code point's worth (for example, when `\uXXXX` escaping a character -// in a string, or when outputing extra whitespace when the ReEncoder -// is prettifying the JSON). -func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { - if enc.err != nil { - return 0, enc.err - } - if enc.bufLen > 0 { - enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen]) - return 0, enc.err - } - - enc.written = 0 - +func (enc *ReEncoder) handleRune(c rune) { rehandle: t, err := enc.par.HandleRune(c) if err != nil { @@ -273,14 +261,14 @@ rehandle: Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } - if err := enc.handleRune(c, t, enc.stackSize()); err != nil { + if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } if t == jsonparse.RuneTypeEOF { if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { @@ -291,12 +279,11 @@ rehandle: Err: fmt.Errorf("invalid character %q after top-level value", c), Offset: enc.inputPos, } - return enc.written, enc.err + return } } enc.inputPos += int64(utf8.RuneLen(c)) - return enc.written, enc.err } // semi-public API ///////////////////////////////////////////////////////////// @@ -327,7 +314,7 @@ func (enc *ReEncoder) stackSize() int { return sz } -func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error { +func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error { if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -365,7 +352,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er t: t, stackSize: stackSize, }) - if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { + if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil { return err } switch { @@ -376,7 +363,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er return err } for _, tuple := range buf[1:] { - if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil { return err } } @@ -411,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) if enc.lastNonSpace == jsonparse.RuneTypeEOF { switch { case enc.wasNumber && t.IsNumber(): - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err, false } case enc.cfg.Indent != "" && !enc.cfg.Compact: - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err, false } } @@ -433,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) fallthrough default: for enc.fracZeros > 0 { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.fracZeros-- @@ -449,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) enc.expZero = false default: if enc.expZero { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.expZero = false @@ -484,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) return err, false } case jsonparse.RuneTypeObjectColon: - if err := enc.emitByte(' '); err != nil { + if err := enc.out.WriteByte(' '); err != nil { return err, false } } @@ -508,7 +495,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { switch t { case jsonparse.RuneTypeStringChar: - err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing case jsonparse.RuneTypeStringEsc1: @@ -528,7 +515,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } - err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)) case jsonparse.RuneTypeStringEscUA: enc.uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: @@ -537,7 +524,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { enc.uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) - err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough @@ -546,12 +533,12 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { switch { case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - err = enc.emitByte('\n') + err = enc.out.WriteByte('\n') default: t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed } default: - err = enc.emitByte(byte(c)) + err = enc.out.WriteByte(byte(c)) } if t != jsonparse.RuneTypeSpace { @@ -563,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { return err } -func (enc *ReEncoder) emitByte(c byte) error { - err := enc.out.WriteByte(c) - if err == nil { - enc.written++ - } - return err -} - -func (enc *ReEncoder) emit(n int, err error) error { - enc.written += n - return err -} - func (enc *ReEncoder) emitNlIndent() error { - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err } if enc.cfg.Prefix != "" { - if err := enc.emit(enc.out.WriteString(enc.cfg.Prefix)); err != nil { + if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil { return err } } for i := 0; i < enc.curIndent; i++ { - if err := enc.emit(enc.out.WriteString(enc.cfg.Indent)); err != nil { + if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil { return err } } -- cgit v1.1-4-g5e80