diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-16 22:30:54 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-16 22:30:54 -0700 |
commit | debef01cc500fb9368e1d6d0206a32ca358a8c98 (patch) | |
tree | f021ae7890922e10a1aa119dcdbd7dd2a587f09e | |
parent | d7414035894f378c9e1d48b04a767f61b082186a (diff) | |
parent | f823342d5b9c2ca376d038471889176ab74acf1b (diff) |
Merge branch 'lukeshu/misc'
-rw-r--r-- | ReleaseNotes.md | 29 | ||||
-rw-r--r-- | compat/json/compat.go | 43 | ||||
-rw-r--r-- | compat/json/testcompat_test.go (renamed from compat/json/compat_test.go) | 12 | ||||
-rw-r--r-- | decode.go | 28 | ||||
-rw-r--r-- | encode.go | 19 | ||||
-rw-r--r-- | encode_escape.go | 32 | ||||
-rw-r--r-- | internal/jsonparse/hex.go | 20 | ||||
-rw-r--r-- | internal/jsonparse/parse.go | 28 | ||||
-rw-r--r-- | internal/jsonstring/encode_string.go (renamed from encode_string.go) | 59 | ||||
-rw-r--r-- | reencode.go | 181 |
10 files changed, 226 insertions, 225 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md index e00bf10..da35130 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -4,16 +4,25 @@ User-facing changes: - - `Number` and `RawMessage` type aliases are now available, so that - a user of lowmemjson's native APIs does not need to import - `encoding/json` or compat/json in order to use them. - - - Encoder, ReEncoder: If there was an error writing to the output - stream, it may have returned a `*ReEncodeSyntaxError` even though - it's not a syntax issue, or may have returned the underlying - error without wrapping it. If there is an error writing to the - output, Encoder and ReEncoder now return `*EncodeWriteError` and - `*ReEncodeWriteError` respectively. + - Change: ReEncoder: The ReEncoderConfig struct member is no longer + public. + + - Change: ReEncoder: `WriteRune` may now be called even if there is + a partial UTF-8 codepoint from a `Write` or `WriteString` call, + but now simply returns the width of the rune, rather than the + number of bytes actually written. + + - Feature: `Number` and `RawMessage` type aliases are now + available, so that a user of lowmemjson's native APIs does not + need to import `encoding/json` or compat/json in order to use + them. + + - Bigfix: Encoder, ReEncoder: If there was an error writing to the + output stream, it may have returned a `*ReEncodeSyntaxError` even + though it's not a syntax issue, or may have returned the + underlying error without wrapping it. If there is an error + writing to the output, Encoder and ReEncoder now return + `*EncodeWriteError` and `*ReEncodeWriteError` respectively. # v0.3.5 (2023-02-10) diff --git a/compat/json/compat.go b/compat/json/compat.go index 3678135..c96470d 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -72,8 +72,11 @@ func Marshal(v any) ([]byte, error) { } type Encoder struct { - out io.Writer - buf bytes.Buffer + out io.Writer + buf bytes.Buffer + + cfg lowmemjson.ReEncoderConfig + encoder *lowmemjson.Encoder formatter *lowmemjson.ReEncoder } @@ -81,17 +84,23 @@ type Encoder struct { func NewEncoder(w io.Writer) *Encoder { ret := &Encoder{ out: w, - } - ret.formatter = lowmemjson.NewReEncoder(&ret.buf, lowmemjson.ReEncoderConfig{ - AllowMultipleValues: true, - Compact: true, - ForceTrailingNewlines: true, - }) - ret.encoder = lowmemjson.NewEncoder(ret.formatter) + cfg: lowmemjson.ReEncoderConfig{ + AllowMultipleValues: true, + + Compact: true, + ForceTrailingNewlines: true, + }, + } + ret.refreshConfig() return ret } +func (enc *Encoder) refreshConfig() { + enc.formatter = lowmemjson.NewReEncoder(&enc.buf, enc.cfg) + enc.encoder = lowmemjson.NewEncoder(enc.formatter) +} + func (enc *Encoder) Encode(v any) error { if err := convertEncodeError(enc.encoder.Encode(v)); err != nil { enc.buf.Reset() @@ -104,17 +113,19 @@ func (enc *Encoder) Encode(v any) error { } func (enc *Encoder) SetEscapeHTML(on bool) { - var escaper lowmemjson.BackslashEscaper - if !on { - escaper = lowmemjson.EscapeDefaultNonHTMLSafe + if on { + enc.cfg.BackslashEscape = lowmemjson.EscapeDefault + } else { + enc.cfg.BackslashEscape = lowmemjson.EscapeDefaultNonHTMLSafe } - enc.formatter.BackslashEscape = escaper + enc.refreshConfig() } func (enc *Encoder) SetIndent(prefix, indent string) { - enc.formatter.Compact = prefix == "" && indent == "" - enc.formatter.Prefix = prefix - enc.formatter.Indent = indent + enc.cfg.Compact = prefix == "" && indent == "" + enc.cfg.Prefix = prefix + enc.cfg.Indent = indent + enc.refreshConfig() } // ReEncode wrappers ///////////////////////////////////////////////// diff --git a/compat/json/compat_test.go b/compat/json/testcompat_test.go index 07c75bc..42cbf5c 100644 --- a/compat/json/compat_test.go +++ b/compat/json/testcompat_test.go @@ -11,8 +11,8 @@ import ( _ "unsafe" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" ) @@ -58,20 +58,14 @@ type encodeState struct { bytes.Buffer } -//go:linkname encodeStringFromString git.lukeshu.com/go/lowmemjson.encodeStringFromString -func encodeStringFromString(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str string) error - -//go:linkname encodeStringFromBytes git.lukeshu.com/go/lowmemjson.encodeStringFromBytes -func encodeStringFromBytes(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str []byte) error - func (es *encodeState) string(str string, _ bool) { - if err := encodeStringFromString(&es.Buffer, nil, str); err != nil { + if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { panic(err) } } func (es *encodeState) stringBytes(str []byte, _ bool) { - if err := encodeStringFromBytes(&es.Buffer, nil, str); err != nil { + if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil { panic(err) } } @@ -1145,7 +1145,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco if err := dec.expectRuneType('"', jsonparse.RuneTypeStringBeg, gTyp); err != nil { return err } - var uhex [4]byte + var uhex [3]byte for { c, t, err := dec.readRune() if err != nil { @@ -1178,18 +1178,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco panic(fmt.Errorf("should not happen: unexpected rune after backslash: %q", c)) } case jsonparse.RuneTypeStringEscUA: - uhex[0], _ = jsonparse.HexToInt(c) + uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - uhex[1], _ = jsonparse.HexToInt(c) + uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - uhex[2], _ = jsonparse.HexToInt(c) + uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - uhex[3], _ = jsonparse.HexToInt(c) - c = 0 | - rune(uhex[0])<<12 | - rune(uhex[1])<<8 | - rune(uhex[2])<<4 | - rune(uhex[3])<<0 + c = hexToRune(uhex[0], uhex[1], uhex[2], byte(c)) handleUnicode: if utf16.IsSurrogate(c) { t, err := dec.peekRuneType() @@ -1219,27 +1214,22 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco if err != nil { return err } - uhex[0], _ = jsonparse.HexToInt(b) + uhex[0] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[1], _ = jsonparse.HexToInt(b) + uhex[1] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[2], _ = jsonparse.HexToInt(b) + uhex[2] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[3], _ = jsonparse.HexToInt(b) - c2 := 0 | - rune(uhex[0])<<12 | - rune(uhex[1])<<8 | - rune(uhex[2])<<4 | - rune(uhex[3])<<0 + c2 := hexToRune(uhex[0], uhex[1], uhex[2], byte(b)) d := utf16.DecodeRune(c, c2) if d == utf8.RuneError { _, _ = out.WriteRune(utf8.RuneError) @@ -16,6 +16,7 @@ import ( "strings" "unsafe" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" "git.lukeshu.com/go/lowmemjson/internal/jsonstruct" ) @@ -82,7 +83,11 @@ func (enc *Encoder) Encode(obj any) (err error) { if enc.isRoot { enc.w.par.Reset() } - if err := encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}); err != nil { + escaper := enc.w.cfg.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } + if err := encode(enc.w, reflect.ValueOf(obj), escaper, false, 0, map[any]struct{}{}); err != nil { if rwe, ok := err.(*ReEncodeWriteError); ok { err = &EncodeWriteError{ Err: rwe.Err, @@ -192,7 +197,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo Err: err, } } - if err := encodeStringFromBytes(w, escaper, text); err != nil { + if err := jsonstring.EncodeStringFromBytes(w, escaper, text); err != nil { return err } default: @@ -295,14 +300,14 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } else { if quote { var buf bytes.Buffer - if err := encodeStringFromString(&buf, escaper, val.String()); err != nil { + if err := jsonstring.EncodeStringFromString(&buf, escaper, val.String()); err != nil { return err } - if err := encodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { + if err := jsonstring.EncodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { return err } } else { - if err := encodeStringFromString(w, escaper, val.String()); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, val.String()); err != nil { return err } } @@ -336,7 +341,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } } empty = false - if err := encodeStringFromString(w, escaper, field.Name); err != nil { + if err := jsonstring.EncodeStringFromString(w, escaper, field.Name); err != nil { return err } if err := w.WriteByte(':'); err != nil { @@ -389,7 +394,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo } if !strings.HasPrefix(kStr, `"`) { k.Reset() - if err := encodeStringFromString(&k, escaper, kStr); err != nil { + if err := jsonstring.EncodeStringFromString(&k, escaper, kStr); err != nil { return err } kStr = k.String() diff --git a/encode_escape.go b/encode_escape.go index ab0d9c1..97da6e9 100644 --- a/encode_escape.go +++ b/encode_escape.go @@ -5,7 +5,10 @@ package lowmemjson import ( + "fmt" "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // BackslashEscapeMode identifies one of the three ways that a @@ -17,14 +20,35 @@ import ( // single-character) // // - as a long Unicode `\uXXXX` backslash sequence -type BackslashEscapeMode uint8 +type BackslashEscapeMode = jsonstring.BackslashEscapeMode const ( - BackslashEscapeNone BackslashEscapeMode = iota - BackslashEscapeShort - BackslashEscapeUnicode + BackslashEscapeNone = jsonstring.BackslashEscapeNone + BackslashEscapeShort = jsonstring.BackslashEscapeShort + BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode ) +func hexToInt(c byte) rune { + switch { + case '0' <= c && c <= '9': + return rune(c) - '0' + case 'a' <= c && c <= 'f': + return rune(c) - 'a' + 10 + case 'A' <= c && c <= 'F': + return rune(c) - 'A' + 10 + default: + panic(fmt.Errorf("should not happen: invalid hex char: %q", c)) + } +} + +func hexToRune(a, b, c, d byte) rune { + return 0 | + hexToInt(a)<<12 | + hexToInt(b)<<8 | + hexToInt(c)<<4 | + hexToInt(d)<<0 +} + // A BackslashEscaper controls how a ReEncoder emits a character in a // JSON string. The `rune` argument is the character being // considered, and the `BackslashEscapeMode` argument is how it was diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go deleted file mode 100644 index 3ed5f01..0000000 --- a/internal/jsonparse/hex.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package jsonparse - -const Hex = "0123456789abcdef" - -func HexToInt(c rune) (byte, bool) { - switch { - case '0' <= c && c <= '9': - return byte(c) - '0', true - case 'a' <= c && c <= 'f': - return byte(c) - 'a' + 10, true - case 'A' <= c && c <= 'F': - return byte(c) - 'A' + 10, true - default: - return 0, false - } -} diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 73584d9..2f5c1ab 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -14,6 +14,12 @@ import ( var ErrParserExceededMaxDepth = errors.New("exceeded max depth") +func isHex(c rune) bool { + return ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F') +} + // RuneType is the classification of a rune when parsing JSON input. // A Parser, rather than grouping runes into tokens and classifying // tokens, classifies runes directly. @@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) } case RuneTypeStringEscU: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUA), nil case RuneTypeStringEscUA: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUB), nil case RuneTypeStringEscUB: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUC), nil case RuneTypeStringEscUC: - if _, ok := HexToInt(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil // number ////////////////////////////////////////////////////////////////////////////////// // // Here's a flattened drawing of the syntax diagram from www.json.org : diff --git a/encode_string.go b/internal/jsonstring/encode_string.go index 208aef4..1b0c68a 100644 --- a/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: GPL-2.0-or-later -package lowmemjson +package jsonstring import ( "fmt" @@ -10,22 +10,36 @@ import ( "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { +// BackslashEscapeMode is describe in the main lowmemjson package +// docs. +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// BackslashEscaper is describe in the main lowmemjson package docs. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func writeStringUnicodeEscape(w io.Writer, c rune) error { + const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', 'u', - jsonparse.Hex[(c>>12)&0xf], - jsonparse.Hex[(c>>8)&0xf], - jsonparse.Hex[(c>>4)&0xf], - jsonparse.Hex[(c>>0)&0xf], + alphabet[(c>>12)&0xf], + alphabet[(c>>8)&0xf], + alphabet[(c>>4)&0xf], + alphabet[(c>>0)&0xf], } - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func writeStringShortEscape(w io.Writer, c rune) (int, error) { +func writeStringShortEscape(w io.Writer, c rune) error { var b byte switch c { case '"', '\\', '/': @@ -44,14 +58,12 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) } buf := [2]byte{'\\', b} - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { - if escaper == nil { - escaper = EscapeDefault - } - switch escaper(c, wasEscaped) { +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { + switch escape { case BackslashEscapeNone: switch { case c < 0x0020: // override, gotta escape these @@ -64,19 +76,22 @@ func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err default: // obey return writeStringUnicodeEscape(w, c) } @@ -85,12 +100,12 @@ func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, } } -func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { +func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { if err := w.WriteByte('"'); err != nil { return err } for _, c := range str { - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } } @@ -100,13 +115,13 @@ func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st return nil } -func encodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { +func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { if err := w.WriteByte('"'); err != nil { return err } for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } i += size diff --git a/reencode.go b/reencode.go index d19dc1a..f18888c 100644 --- a/reencode.go +++ b/reencode.go @@ -12,6 +12,7 @@ import ( "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -71,32 +72,31 @@ type ReEncoderConfig struct { // bufio.Writer. func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ - ReEncoderConfig: cfg, - out: fastio.NewAllWriter(out), - specu: new(speculation), + cfg: cfg, + out: fastio.NewAllWriter(out), + specu: new(speculation), } } // A ReEncoder takes a stream of JSON elements (by way of implementing -// io.Writer and WriteRune), and re-encodes the JSON, writing it to -// the .Out member. +// io.Writer, io.StringWriter, io.ByteWriter, and WriteRune), and +// re-encodes the JSON, writing it to the .Out member. // // This is useful for prettifying, minifying, sanitizing, and/or // validating JSON. // // The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { - ReEncoderConfig + cfg ReEncoderConfig out fastio.AllWriter - // state: .Write's and .WriteString's utf8-decoding buffer + // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int - // state: .WriteRune + // state: contract between the public API and .handleRune err error par jsonparse.Parser - written int inputPos int64 // state: .handleRune @@ -104,7 +104,7 @@ type ReEncoder struct { lastNonSpaceNonEOF jsonparse.RuneType wasNumber bool curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings + uhex [3]byte // "\uABCD"-encoded characters in strings fracZeros int64 expZero bool specu *speculation @@ -165,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRune(p[n:]) { c, size := utf8.DecodeRune(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } @@ -192,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRuneInString(p[n:]) { c, size := utf8.DecodeRuneInString(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } - enc.bufLen = copy(enc.buf[:], p[n:]) return len(p), nil } @@ -212,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error { return fastio.WriteByte(enc, b) } +// WriteRune does what you'd expect. +func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { + return fastio.WriteRune(enc, c) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -231,40 +239,21 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } return enc.err } - if enc.AllowMultipleValues { + if enc.cfg.AllowMultipleValues { enc.par.Reset() } } return nil } -// WriteRune writes a single Unicode code point, returning the number -// of bytes written to the output stream and any error. -// -// Even when there is no error, the number of bytes written may be -// zero (for example, when the rune is whitespace and the ReEncoder is -// minifying the JSON), or it may be substantially longer than one -// code point's worth (for example, when `\uXXXX` escaping a character -// in a string, or when outputing extra whitespace when the ReEncoder -// is prettifying the JSON). -func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { - if enc.err != nil { - return 0, enc.err - } - if enc.bufLen > 0 { - enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen]) - return 0, enc.err - } - - enc.written = 0 - +func (enc *ReEncoder) handleRune(c rune) { rehandle: t, err := enc.par.HandleRune(c) if err != nil { @@ -272,17 +261,17 @@ rehandle: Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } - if err := enc.handleRune(c, t, enc.stackSize()); err != nil { + if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } if t == jsonparse.RuneTypeEOF { - if enc.AllowMultipleValues && len(enc.barriers) == 0 { + if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { enc.par.Reset() goto rehandle } else { @@ -290,12 +279,11 @@ rehandle: Err: fmt.Errorf("invalid character %q after top-level value", c), Offset: enc.inputPos, } - return enc.written, enc.err + return } } enc.inputPos += int64(utf8.RuneLen(c)) - return enc.written, enc.err } // semi-public API ///////////////////////////////////////////////////////////// @@ -326,8 +314,8 @@ func (enc *ReEncoder) stackSize() int { return sz } -func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error { - if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { +func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error { + if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -341,10 +329,10 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er enc.specu.speculating = true enc.specu.endWhenStackSize = stackSize - 1 enc.specu.fmt = ReEncoder{ - ReEncoderConfig: enc.ReEncoderConfig, - out: &enc.specu.compact, + cfg: enc.cfg, + out: &enc.specu.compact, } - enc.specu.fmt.Compact = true + enc.specu.fmt.cfg.Compact = true enc.specu.buf = append(enc.specu.buf, inputTuple{ c: c, t: t, @@ -364,18 +352,18 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er t: t, stackSize: stackSize, }) - if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { + if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil { return err } switch { - case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent buf := append([]inputTuple(nil), enc.specu.buf...) enc.specu.Reset() if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } for _, tuple := range buf[1:] { - if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil { return err } } @@ -410,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) if enc.lastNonSpace == jsonparse.RuneTypeEOF { switch { case enc.wasNumber && t.IsNumber(): - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err, false } - case enc.Indent != "" && !enc.Compact: - if err := enc.emitByte('\n'); err != nil { + case enc.cfg.Indent != "" && !enc.cfg.Compact: + if err := enc.out.WriteByte('\n'); err != nil { return err, false } } @@ -432,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) fallthrough default: for enc.fracZeros > 0 { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.fracZeros-- @@ -448,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) enc.expZero = false default: if enc.expZero { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.expZero = false @@ -457,11 +445,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // whitespace switch { - case enc.Compact: + case enc.cfg.Compact: if t == jsonparse.RuneTypeSpace { return nil, false } - case enc.Indent != "": + case enc.cfg.Indent != "": switch t { case jsonparse.RuneTypeSpace: // let us manage whitespace, don't pass it through @@ -483,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) return err, false } case jsonparse.RuneTypeObjectColon: - if err := enc.emitByte(' '); err != nil { + if err := enc.out.WriteByte(' '); err != nil { return err, false } } @@ -499,62 +487,58 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { + escaper := enc.cfg.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } var err error switch t { case jsonparse.RuneTypeStringChar: - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing case jsonparse.RuneTypeStringEsc1: switch c { - case '"': - err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) - case '\\': - err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) - case '/': - err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + case '"', '\\', '/': + // self case 'b': - err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + c = '\b' case 'f': - err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + c = '\f' case 'n': - err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + c = '\n' case 'r': - err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + c = '\r' case 't': - err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + c = '\t' default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)) case jsonparse.RuneTypeStringEscUA: - enc.uhex[0], _ = jsonparse.HexToInt(c) + enc.uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - enc.uhex[1], _ = jsonparse.HexToInt(c) + enc.uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - enc.uhex[2], _ = jsonparse.HexToInt(c) + enc.uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - enc.uhex[3], _ = jsonparse.HexToInt(c) - c := 0 | - rune(enc.uhex[0])<<12 | - rune(enc.uhex[1])<<8 | - rune(enc.uhex[2])<<4 | - rune(enc.uhex[3])<<0 - err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines && len(enc.barriers) == 0: + case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - err = enc.emitByte('\n') + err = enc.out.WriteByte('\n') default: t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed } default: - err = enc.emitByte(byte(c)) + err = enc.out.WriteByte(byte(c)) } if t != jsonparse.RuneTypeSpace { @@ -566,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { return err } -func (enc *ReEncoder) emitByte(c byte) error { - err := enc.out.WriteByte(c) - if err == nil { - enc.written++ - } - return err -} - -func (enc *ReEncoder) emit(n int, err error) error { - enc.written += n - return err -} - func (enc *ReEncoder) emitNlIndent() error { - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err } - if enc.Prefix != "" { - if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil { + if enc.cfg.Prefix != "" { + if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil { return err } } for i := 0; i < enc.curIndent; i++ { - if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { + if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil { return err } } |