diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-15 15:10:00 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-16 22:30:29 -0700 |
commit | f823342d5b9c2ca376d038471889176ab74acf1b (patch) | |
tree | f021ae7890922e10a1aa119dcdbd7dd2a587f09e | |
parent | 2b7fff828e29b63ae08a871b4b1e74784fab29e5 (diff) |
reencode: Don't bother tracking the number of bytes written
-rw-r--r-- | ReleaseNotes.md | 5 | ||||
-rw-r--r-- | internal/jsonstring/encode_string.go | 25 | ||||
-rw-r--r-- | reencode.go | 108 |
3 files changed, 61 insertions, 77 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md index bee16c4..da35130 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -7,6 +7,11 @@ - Change: ReEncoder: The ReEncoderConfig struct member is no longer public. + - Change: ReEncoder: `WriteRune` may now be called even if there is + a partial UTF-8 codepoint from a `Write` or `WriteString` call, + but now simply returns the width of the rune, rather than the + number of bytes actually written. + - Feature: `Number` and `RawMessage` type aliases are now available, so that a user of lowmemjson's native APIs does not need to import `encoding/json` or compat/json in order to use diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index a7670c6..1b0c68a 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -25,7 +25,7 @@ const ( // BackslashEscaper is describe in the main lowmemjson package docs. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { +func writeStringUnicodeEscape(w io.Writer, c rune) error { const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', @@ -35,10 +35,11 @@ func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { alphabet[(c>>4)&0xf], alphabet[(c>>0)&0xf], } - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func writeStringShortEscape(w io.Writer, c rune) (int, error) { +func writeStringShortEscape(w io.Writer, c rune) error { var b byte switch c { case '"', '\\', '/': @@ -57,10 +58,11 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c)) } buf := [2]byte{'\\', b} - return w.Write(buf[:]) + _, err := w.Write(buf[:]) + return err } -func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) { +func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error { switch escape { case BackslashEscapeNone: switch { @@ -74,19 +76,22 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (in case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return w.WriteRune(c) + _, err := w.WriteRune(c) + return err default: // obey return writeStringUnicodeEscape(w, c) } @@ -100,7 +105,7 @@ func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st return err } for _, c := range str { - if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } } @@ -116,7 +121,7 @@ func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []b } for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) - if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { + if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil { return err } i += size diff --git a/reencode.go b/reencode.go index 4974cb7..f18888c 100644 --- a/reencode.go +++ b/reencode.go @@ -90,14 +90,13 @@ type ReEncoder struct { cfg ReEncoderConfig out fastio.AllWriter - // state: .Write's and .WriteString's utf8-decoding buffer + // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int - // state: .WriteRune + // state: contract between the public API and .handleRune err error par jsonparse.Parser - written int inputPos int64 // state: .handleRune @@ -166,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRune(p[n:]) { c, size := utf8.DecodeRune(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } @@ -193,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) { c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err + enc.handleRune(c) + if enc.err != nil { + return 0, enc.err } } for utf8.FullRuneInString(p[n:]) { c, size := utf8.DecodeRuneInString(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err + enc.handleRune(c) + if enc.err != nil { + return n, enc.err } n += size } - enc.bufLen = copy(enc.buf[:], p[n:]) return len(p), nil } @@ -213,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error { return fastio.WriteByte(enc, b) } +// WriteRune does what you'd expect. +func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { + return fastio.WriteRune(enc, c) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -232,7 +239,7 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, @@ -246,26 +253,7 @@ func (enc *ReEncoder) Close() error { return nil } -// WriteRune writes a single Unicode code point, returning the number -// of bytes written to the output stream and any error. -// -// Even when there is no error, the number of bytes written may be -// zero (for example, when the rune is whitespace and the ReEncoder is -// minifying the JSON), or it may be substantially longer than one -// code point's worth (for example, when `\uXXXX` escaping a character -// in a string, or when outputing extra whitespace when the ReEncoder -// is prettifying the JSON). -func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { - if enc.err != nil { - return 0, enc.err - } - if enc.bufLen > 0 { - enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen]) - return 0, enc.err - } - - enc.written = 0 - +func (enc *ReEncoder) handleRune(c rune) { rehandle: t, err := enc.par.HandleRune(c) if err != nil { @@ -273,14 +261,14 @@ rehandle: Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } - if err := enc.handleRune(c, t, enc.stackSize()); err != nil { + if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } - return enc.written, enc.err + return } if t == jsonparse.RuneTypeEOF { if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { @@ -291,12 +279,11 @@ rehandle: Err: fmt.Errorf("invalid character %q after top-level value", c), Offset: enc.inputPos, } - return enc.written, enc.err + return } } enc.inputPos += int64(utf8.RuneLen(c)) - return enc.written, enc.err } // semi-public API ///////////////////////////////////////////////////////////// @@ -327,7 +314,7 @@ func (enc *ReEncoder) stackSize() int { return sz } -func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error { +func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error { if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -365,7 +352,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er t: t, stackSize: stackSize, }) - if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { + if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil { return err } switch { @@ -376,7 +363,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er return err } for _, tuple := range buf[1:] { - if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil { return err } } @@ -411,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) if enc.lastNonSpace == jsonparse.RuneTypeEOF { switch { case enc.wasNumber && t.IsNumber(): - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err, false } case enc.cfg.Indent != "" && !enc.cfg.Compact: - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err, false } } @@ -433,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) fallthrough default: for enc.fracZeros > 0 { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.fracZeros-- @@ -449,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) enc.expZero = false default: if enc.expZero { - if err := enc.emitByte('0'); err != nil { + if err := enc.out.WriteByte('0'); err != nil { return err, false } enc.expZero = false @@ -484,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) return err, false } case jsonparse.RuneTypeObjectColon: - if err := enc.emitByte(' '); err != nil { + if err := enc.out.WriteByte(' '); err != nil { return err, false } } @@ -508,7 +495,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { switch t { case jsonparse.RuneTypeStringChar: - err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing case jsonparse.RuneTypeStringEsc1: @@ -528,7 +515,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } - err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)) case jsonparse.RuneTypeStringEscUA: enc.uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: @@ -537,7 +524,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { enc.uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) - err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))) + err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough @@ -546,12 +533,12 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { switch { case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - err = enc.emitByte('\n') + err = enc.out.WriteByte('\n') default: t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed } default: - err = enc.emitByte(byte(c)) + err = enc.out.WriteByte(byte(c)) } if t != jsonparse.RuneTypeSpace { @@ -563,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { return err } -func (enc *ReEncoder) emitByte(c byte) error { - err := enc.out.WriteByte(c) - if err == nil { - enc.written++ - } - return err -} - -func (enc *ReEncoder) emit(n int, err error) error { - enc.written += n - return err -} - func (enc *ReEncoder) emitNlIndent() error { - if err := enc.emitByte('\n'); err != nil { + if err := enc.out.WriteByte('\n'); err != nil { return err } if enc.cfg.Prefix != "" { - if err := enc.emit(enc.out.WriteString(enc.cfg.Prefix)); err != nil { + if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil { return err } } for i := 0; i < enc.curIndent; i++ { - if err := enc.emit(enc.out.WriteString(enc.cfg.Indent)); err != nil { + if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil { return err } } |