summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-15 15:10:00 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:30:29 -0700
commitf823342d5b9c2ca376d038471889176ab74acf1b (patch)
treef021ae7890922e10a1aa119dcdbd7dd2a587f09e
parent2b7fff828e29b63ae08a871b4b1e74784fab29e5 (diff)
reencode: Don't bother tracking the number of bytes written
-rw-r--r--ReleaseNotes.md5
-rw-r--r--internal/jsonstring/encode_string.go25
-rw-r--r--reencode.go108
3 files changed, 61 insertions, 77 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index bee16c4..da35130 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -7,6 +7,11 @@
- Change: ReEncoder: The ReEncoderConfig struct member is no longer
public.
+ - Change: ReEncoder: `WriteRune` may now be called even if there is
+ a partial UTF-8 codepoint from a `Write` or `WriteString` call,
+ but now simply returns the width of the rune, rather than the
+ number of bytes actually written.
+
- Feature: `Number` and `RawMessage` type aliases are now
available, so that a user of lowmemjson's native APIs does not
need to import `encoding/json` or compat/json in order to use
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go
index a7670c6..1b0c68a 100644
--- a/internal/jsonstring/encode_string.go
+++ b/internal/jsonstring/encode_string.go
@@ -25,7 +25,7 @@ const (
// BackslashEscaper is describe in the main lowmemjson package docs.
type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
-func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
+func writeStringUnicodeEscape(w io.Writer, c rune) error {
const alphabet = "0123456789abcdef"
buf := [6]byte{
'\\',
@@ -35,10 +35,11 @@ func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
alphabet[(c>>4)&0xf],
alphabet[(c>>0)&0xf],
}
- return w.Write(buf[:])
+ _, err := w.Write(buf[:])
+ return err
}
-func writeStringShortEscape(w io.Writer, c rune) (int, error) {
+func writeStringShortEscape(w io.Writer, c rune) error {
var b byte
switch c {
case '"', '\\', '/':
@@ -57,10 +58,11 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) {
panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c))
}
buf := [2]byte{'\\', b}
- return w.Write(buf[:])
+ _, err := w.Write(buf[:])
+ return err
}
-func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) {
+func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error {
switch escape {
case BackslashEscapeNone:
switch {
@@ -74,19 +76,22 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (in
case c == '"' || c == '\\': // override, gotta escape these
return writeStringShortEscape(w, c)
default: // obey
- return w.WriteRune(c)
+ _, err := w.WriteRune(c)
+ return err
}
case BackslashEscapeShort:
switch c {
case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey
return writeStringShortEscape(w, c)
default: // override, can't short-escape these
- return w.WriteRune(c)
+ _, err := w.WriteRune(c)
+ return err
}
case BackslashEscapeUnicode:
switch {
case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?)
- return w.WriteRune(c)
+ _, err := w.WriteRune(c)
+ return err
default: // obey
return writeStringUnicodeEscape(w, c)
}
@@ -100,7 +105,7 @@ func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st
return err
}
for _, c := range str {
- if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
return err
}
}
@@ -116,7 +121,7 @@ func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []b
}
for i := 0; i < len(str); {
c, size := utf8.DecodeRune(str[i:])
- if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
+ if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
return err
}
i += size
diff --git a/reencode.go b/reencode.go
index 4974cb7..f18888c 100644
--- a/reencode.go
+++ b/reencode.go
@@ -90,14 +90,13 @@ type ReEncoder struct {
cfg ReEncoderConfig
out fastio.AllWriter
- // state: .Write's and .WriteString's utf8-decoding buffer
+ // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer
buf [utf8.UTFMax]byte
bufLen int
- // state: .WriteRune
+ // state: contract between the public API and .handleRune
err error
par jsonparse.Parser
- written int
inputPos int64
// state: .handleRune
@@ -166,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) {
c, size := utf8.DecodeRune(enc.buf[:])
n += size - enc.bufLen
enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return 0, enc.err
}
}
for utf8.FullRune(p[n:]) {
c, size := utf8.DecodeRune(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return n, enc.err
}
n += size
}
@@ -193,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) {
c, size := utf8.DecodeRune(enc.buf[:])
n += size - enc.bufLen
enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return 0, enc.err
}
}
for utf8.FullRuneInString(p[n:]) {
c, size := utf8.DecodeRuneInString(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return n, enc.err
}
n += size
}
- enc.bufLen = copy(enc.buf[:], p[n:])
return len(p), nil
}
@@ -213,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error {
return fastio.WriteByte(enc, b)
}
+// WriteRune does what you'd expect.
+func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
+ return fastio.WriteRune(enc, c)
+}
+
// Close implements io.Closer; it does what you'd expect, mostly.
//
// The *ReEncoder may continue to be written to with new JSON values
@@ -232,7 +239,7 @@ func (enc *ReEncoder) Close() error {
return enc.err
}
if len(enc.barriers) == 0 {
- if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
@@ -246,26 +253,7 @@ func (enc *ReEncoder) Close() error {
return nil
}
-// WriteRune writes a single Unicode code point, returning the number
-// of bytes written to the output stream and any error.
-//
-// Even when there is no error, the number of bytes written may be
-// zero (for example, when the rune is whitespace and the ReEncoder is
-// minifying the JSON), or it may be substantially longer than one
-// code point's worth (for example, when `\uXXXX` escaping a character
-// in a string, or when outputing extra whitespace when the ReEncoder
-// is prettifying the JSON).
-func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
- if enc.err != nil {
- return 0, enc.err
- }
- if enc.bufLen > 0 {
- enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen])
- return 0, enc.err
- }
-
- enc.written = 0
-
+func (enc *ReEncoder) handleRune(c rune) {
rehandle:
t, err := enc.par.HandleRune(c)
if err != nil {
@@ -273,14 +261,14 @@ rehandle:
Err: err,
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
- if err := enc.handleRune(c, t, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
if t == jsonparse.RuneTypeEOF {
if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 {
@@ -291,12 +279,11 @@ rehandle:
Err: fmt.Errorf("invalid character %q after top-level value", c),
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
}
enc.inputPos += int64(utf8.RuneLen(c))
- return enc.written, enc.err
}
// semi-public API /////////////////////////////////////////////////////////////
@@ -327,7 +314,7 @@ func (enc *ReEncoder) stackSize() int {
return sz
}
-func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error {
+func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error {
if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" {
return enc.handleRuneNoSpeculation(c, t)
}
@@ -365,7 +352,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er
t: t,
stackSize: stackSize,
})
- if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil {
+ if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil {
return err
}
switch {
@@ -376,7 +363,7 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er
return err
}
for _, tuple := range buf[1:] {
- if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil {
+ if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil {
return err
}
}
@@ -411,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
if enc.lastNonSpace == jsonparse.RuneTypeEOF {
switch {
case enc.wasNumber && t.IsNumber():
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err, false
}
case enc.cfg.Indent != "" && !enc.cfg.Compact:
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err, false
}
}
@@ -433,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
fallthrough
default:
for enc.fracZeros > 0 {
- if err := enc.emitByte('0'); err != nil {
+ if err := enc.out.WriteByte('0'); err != nil {
return err, false
}
enc.fracZeros--
@@ -449,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
enc.expZero = false
default:
if enc.expZero {
- if err := enc.emitByte('0'); err != nil {
+ if err := enc.out.WriteByte('0'); err != nil {
return err, false
}
enc.expZero = false
@@ -484,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
return err, false
}
case jsonparse.RuneTypeObjectColon:
- if err := enc.emitByte(' '); err != nil {
+ if err := enc.out.WriteByte(' '); err != nil {
return err, false
}
}
@@ -508,7 +495,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
switch t {
case jsonparse.RuneTypeStringChar:
- err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))
case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU:
// do nothing
case jsonparse.RuneTypeStringEsc1:
@@ -528,7 +515,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
default:
panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c))
}
- err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))
case jsonparse.RuneTypeStringEscUA:
enc.uhex[0] = byte(c)
case jsonparse.RuneTypeStringEscUB:
@@ -537,7 +524,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
enc.uhex[2] = byte(c)
case jsonparse.RuneTypeStringEscUD:
c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c))
- err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))
case jsonparse.RuneTypeError: // EOF explicitly stated by .Close()
fallthrough
@@ -546,12 +533,12 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
switch {
case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0:
t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
- err = enc.emitByte('\n')
+ err = enc.out.WriteByte('\n')
default:
t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed
}
default:
- err = enc.emitByte(byte(c))
+ err = enc.out.WriteByte(byte(c))
}
if t != jsonparse.RuneTypeSpace {
@@ -563,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
return err
}
-func (enc *ReEncoder) emitByte(c byte) error {
- err := enc.out.WriteByte(c)
- if err == nil {
- enc.written++
- }
- return err
-}
-
-func (enc *ReEncoder) emit(n int, err error) error {
- enc.written += n
- return err
-}
-
func (enc *ReEncoder) emitNlIndent() error {
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err
}
if enc.cfg.Prefix != "" {
- if err := enc.emit(enc.out.WriteString(enc.cfg.Prefix)); err != nil {
+ if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil {
return err
}
}
for i := 0; i < enc.curIndent; i++ {
- if err := enc.emit(enc.out.WriteString(enc.cfg.Indent)); err != nil {
+ if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil {
return err
}
}