summaryrefslogtreecommitdiff
path: root/reencode.go
diff options
context:
space:
mode:
Diffstat (limited to 'reencode.go')
-rw-r--r--reencode.go181
1 files changed, 76 insertions, 105 deletions
diff --git a/reencode.go b/reencode.go
index d19dc1a..f18888c 100644
--- a/reencode.go
+++ b/reencode.go
@@ -12,6 +12,7 @@ import (
"git.lukeshu.com/go/lowmemjson/internal/fastio"
"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
// A ReEncoderConfig controls how a ReEncoder should behave.
@@ -71,32 +72,31 @@ type ReEncoderConfig struct {
// bufio.Writer.
func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
return &ReEncoder{
- ReEncoderConfig: cfg,
- out: fastio.NewAllWriter(out),
- specu: new(speculation),
+ cfg: cfg,
+ out: fastio.NewAllWriter(out),
+ specu: new(speculation),
}
}
// A ReEncoder takes a stream of JSON elements (by way of implementing
-// io.Writer and WriteRune), and re-encodes the JSON, writing it to
-// the .Out member.
+// io.Writer, io.StringWriter, io.ByteWriter, and WriteRune), and
+// re-encodes the JSON, writing it to the .Out member.
//
// This is useful for prettifying, minifying, sanitizing, and/or
// validating JSON.
//
// The memory use of a ReEncoder is O(CompactIfUnder+depth).
type ReEncoder struct {
- ReEncoderConfig
+ cfg ReEncoderConfig
out fastio.AllWriter
- // state: .Write's and .WriteString's utf8-decoding buffer
+ // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer
buf [utf8.UTFMax]byte
bufLen int
- // state: .WriteRune
+ // state: contract between the public API and .handleRune
err error
par jsonparse.Parser
- written int
inputPos int64
// state: .handleRune
@@ -104,7 +104,7 @@ type ReEncoder struct {
lastNonSpaceNonEOF jsonparse.RuneType
wasNumber bool
curIndent int
- uhex [4]byte // "\uABCD"-encoded characters in strings
+ uhex [3]byte // "\uABCD"-encoded characters in strings
fracZeros int64
expZero bool
specu *speculation
@@ -165,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) {
c, size := utf8.DecodeRune(enc.buf[:])
n += size - enc.bufLen
enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return 0, enc.err
}
}
for utf8.FullRune(p[n:]) {
c, size := utf8.DecodeRune(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return n, enc.err
}
n += size
}
@@ -192,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) {
c, size := utf8.DecodeRune(enc.buf[:])
n += size - enc.bufLen
enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return 0, enc.err
}
}
for utf8.FullRuneInString(p[n:]) {
c, size := utf8.DecodeRuneInString(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return n, enc.err
}
n += size
}
- enc.bufLen = copy(enc.buf[:], p[n:])
return len(p), nil
}
@@ -212,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error {
return fastio.WriteByte(enc, b)
}
+// WriteRune does what you'd expect.
+func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
+ return fastio.WriteRune(enc, c)
+}
+
// Close implements io.Closer; it does what you'd expect, mostly.
//
// The *ReEncoder may continue to be written to with new JSON values
@@ -231,40 +239,21 @@ func (enc *ReEncoder) Close() error {
return enc.err
}
if len(enc.barriers) == 0 {
- if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
return enc.err
}
- if enc.AllowMultipleValues {
+ if enc.cfg.AllowMultipleValues {
enc.par.Reset()
}
}
return nil
}
-// WriteRune writes a single Unicode code point, returning the number
-// of bytes written to the output stream and any error.
-//
-// Even when there is no error, the number of bytes written may be
-// zero (for example, when the rune is whitespace and the ReEncoder is
-// minifying the JSON), or it may be substantially longer than one
-// code point's worth (for example, when `\uXXXX` escaping a character
-// in a string, or when outputing extra whitespace when the ReEncoder
-// is prettifying the JSON).
-func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
- if enc.err != nil {
- return 0, enc.err
- }
- if enc.bufLen > 0 {
- enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen])
- return 0, enc.err
- }
-
- enc.written = 0
-
+func (enc *ReEncoder) handleRune(c rune) {
rehandle:
t, err := enc.par.HandleRune(c)
if err != nil {
@@ -272,17 +261,17 @@ rehandle:
Err: err,
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
- if err := enc.handleRune(c, t, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
if t == jsonparse.RuneTypeEOF {
- if enc.AllowMultipleValues && len(enc.barriers) == 0 {
+ if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 {
enc.par.Reset()
goto rehandle
} else {
@@ -290,12 +279,11 @@ rehandle:
Err: fmt.Errorf("invalid character %q after top-level value", c),
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
}
enc.inputPos += int64(utf8.RuneLen(c))
- return enc.written, enc.err
}
// semi-public API /////////////////////////////////////////////////////////////
@@ -326,8 +314,8 @@ func (enc *ReEncoder) stackSize() int {
return sz
}
-func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error {
- if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" {
+func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error {
+ if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" {
return enc.handleRuneNoSpeculation(c, t)
}
@@ -341,10 +329,10 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er
enc.specu.speculating = true
enc.specu.endWhenStackSize = stackSize - 1
enc.specu.fmt = ReEncoder{
- ReEncoderConfig: enc.ReEncoderConfig,
- out: &enc.specu.compact,
+ cfg: enc.cfg,
+ out: &enc.specu.compact,
}
- enc.specu.fmt.Compact = true
+ enc.specu.fmt.cfg.Compact = true
enc.specu.buf = append(enc.specu.buf, inputTuple{
c: c,
t: t,
@@ -364,18 +352,18 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er
t: t,
stackSize: stackSize,
})
- if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil {
+ if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil {
return err
}
switch {
- case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent
+ case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent
buf := append([]inputTuple(nil), enc.specu.buf...)
enc.specu.Reset()
if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil {
return err
}
for _, tuple := range buf[1:] {
- if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil {
+ if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil {
return err
}
}
@@ -410,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
if enc.lastNonSpace == jsonparse.RuneTypeEOF {
switch {
case enc.wasNumber && t.IsNumber():
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err, false
}
- case enc.Indent != "" && !enc.Compact:
- if err := enc.emitByte('\n'); err != nil {
+ case enc.cfg.Indent != "" && !enc.cfg.Compact:
+ if err := enc.out.WriteByte('\n'); err != nil {
return err, false
}
}
@@ -432,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
fallthrough
default:
for enc.fracZeros > 0 {
- if err := enc.emitByte('0'); err != nil {
+ if err := enc.out.WriteByte('0'); err != nil {
return err, false
}
enc.fracZeros--
@@ -448,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
enc.expZero = false
default:
if enc.expZero {
- if err := enc.emitByte('0'); err != nil {
+ if err := enc.out.WriteByte('0'); err != nil {
return err, false
}
enc.expZero = false
@@ -457,11 +445,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
// whitespace
switch {
- case enc.Compact:
+ case enc.cfg.Compact:
if t == jsonparse.RuneTypeSpace {
return nil, false
}
- case enc.Indent != "":
+ case enc.cfg.Indent != "":
switch t {
case jsonparse.RuneTypeSpace:
// let us manage whitespace, don't pass it through
@@ -483,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
return err, false
}
case jsonparse.RuneTypeObjectColon:
- if err := enc.emitByte(' '); err != nil {
+ if err := enc.out.WriteByte(' '); err != nil {
return err, false
}
}
@@ -499,62 +487,58 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
// handleRuneMain handles the new rune itself, not buffered things.
func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
+ escaper := enc.cfg.BackslashEscape
+ if escaper == nil {
+ escaper = EscapeDefault
+ }
var err error
switch t {
case jsonparse.RuneTypeStringChar:
- err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))
case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU:
// do nothing
case jsonparse.RuneTypeStringEsc1:
switch c {
- case '"':
- err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape))
- case '\\':
- err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape))
- case '/':
- err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape))
+ case '"', '\\', '/':
+ // self
case 'b':
- err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\b'
case 'f':
- err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\f'
case 'n':
- err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\n'
case 'r':
- err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\r'
case 't':
- err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\t'
default:
panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c))
}
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))
case jsonparse.RuneTypeStringEscUA:
- enc.uhex[0], _ = jsonparse.HexToInt(c)
+ enc.uhex[0] = byte(c)
case jsonparse.RuneTypeStringEscUB:
- enc.uhex[1], _ = jsonparse.HexToInt(c)
+ enc.uhex[1] = byte(c)
case jsonparse.RuneTypeStringEscUC:
- enc.uhex[2], _ = jsonparse.HexToInt(c)
+ enc.uhex[2] = byte(c)
case jsonparse.RuneTypeStringEscUD:
- enc.uhex[3], _ = jsonparse.HexToInt(c)
- c := 0 |
- rune(enc.uhex[0])<<12 |
- rune(enc.uhex[1])<<8 |
- rune(enc.uhex[2])<<4 |
- rune(enc.uhex[3])<<0
- err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape))
+ c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))
case jsonparse.RuneTypeError: // EOF explicitly stated by .Close()
fallthrough
case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value
enc.wasNumber = enc.lastNonSpace.IsNumber()
switch {
- case enc.ForceTrailingNewlines && len(enc.barriers) == 0:
+ case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0:
t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
- err = enc.emitByte('\n')
+ err = enc.out.WriteByte('\n')
default:
t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed
}
default:
- err = enc.emitByte(byte(c))
+ err = enc.out.WriteByte(byte(c))
}
if t != jsonparse.RuneTypeSpace {
@@ -566,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
return err
}
-func (enc *ReEncoder) emitByte(c byte) error {
- err := enc.out.WriteByte(c)
- if err == nil {
- enc.written++
- }
- return err
-}
-
-func (enc *ReEncoder) emit(n int, err error) error {
- enc.written += n
- return err
-}
-
func (enc *ReEncoder) emitNlIndent() error {
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err
}
- if enc.Prefix != "" {
- if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil {
+ if enc.cfg.Prefix != "" {
+ if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil {
return err
}
}
for i := 0; i < enc.curIndent; i++ {
- if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil {
+ if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil {
return err
}
}