summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:30:54 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:30:54 -0700
commitdebef01cc500fb9368e1d6d0206a32ca358a8c98 (patch)
treef021ae7890922e10a1aa119dcdbd7dd2a587f09e
parentd7414035894f378c9e1d48b04a767f61b082186a (diff)
parentf823342d5b9c2ca376d038471889176ab74acf1b (diff)
Merge branch 'lukeshu/misc'
-rw-r--r--ReleaseNotes.md29
-rw-r--r--compat/json/compat.go43
-rw-r--r--compat/json/testcompat_test.go (renamed from compat/json/compat_test.go)12
-rw-r--r--decode.go28
-rw-r--r--encode.go19
-rw-r--r--encode_escape.go32
-rw-r--r--internal/jsonparse/hex.go20
-rw-r--r--internal/jsonparse/parse.go28
-rw-r--r--internal/jsonstring/encode_string.go (renamed from encode_string.go)59
-rw-r--r--reencode.go181
10 files changed, 226 insertions, 225 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index e00bf10..da35130 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -4,16 +4,25 @@
User-facing changes:
- - `Number` and `RawMessage` type aliases are now available, so that
- a user of lowmemjson's native APIs does not need to import
- `encoding/json` or compat/json in order to use them.
-
- - Encoder, ReEncoder: If there was an error writing to the output
- stream, it may have returned a `*ReEncodeSyntaxError` even though
- it's not a syntax issue, or may have returned the underlying
- error without wrapping it. If there is an error writing to the
- output, Encoder and ReEncoder now return `*EncodeWriteError` and
- `*ReEncodeWriteError` respectively.
+ - Change: ReEncoder: The ReEncoderConfig struct member is no longer
+ public.
+
+ - Change: ReEncoder: `WriteRune` may now be called even if there is
+ a partial UTF-8 codepoint from a `Write` or `WriteString` call,
+ but now simply returns the width of the rune, rather than the
+ number of bytes actually written.
+
+ - Feature: `Number` and `RawMessage` type aliases are now
+ available, so that a user of lowmemjson's native APIs does not
+ need to import `encoding/json` or compat/json in order to use
+ them.
+
+ - Bigfix: Encoder, ReEncoder: If there was an error writing to the
+ output stream, it may have returned a `*ReEncodeSyntaxError` even
+ though it's not a syntax issue, or may have returned the
+ underlying error without wrapping it. If there is an error
+ writing to the output, Encoder and ReEncoder now return
+ `*EncodeWriteError` and `*ReEncodeWriteError` respectively.
# v0.3.5 (2023-02-10)
diff --git a/compat/json/compat.go b/compat/json/compat.go
index 3678135..c96470d 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -72,8 +72,11 @@ func Marshal(v any) ([]byte, error) {
}
type Encoder struct {
- out io.Writer
- buf bytes.Buffer
+ out io.Writer
+ buf bytes.Buffer
+
+ cfg lowmemjson.ReEncoderConfig
+
encoder *lowmemjson.Encoder
formatter *lowmemjson.ReEncoder
}
@@ -81,17 +84,23 @@ type Encoder struct {
func NewEncoder(w io.Writer) *Encoder {
ret := &Encoder{
out: w,
- }
- ret.formatter = lowmemjson.NewReEncoder(&ret.buf, lowmemjson.ReEncoderConfig{
- AllowMultipleValues: true,
- Compact: true,
- ForceTrailingNewlines: true,
- })
- ret.encoder = lowmemjson.NewEncoder(ret.formatter)
+ cfg: lowmemjson.ReEncoderConfig{
+ AllowMultipleValues: true,
+
+ Compact: true,
+ ForceTrailingNewlines: true,
+ },
+ }
+ ret.refreshConfig()
return ret
}
+func (enc *Encoder) refreshConfig() {
+ enc.formatter = lowmemjson.NewReEncoder(&enc.buf, enc.cfg)
+ enc.encoder = lowmemjson.NewEncoder(enc.formatter)
+}
+
func (enc *Encoder) Encode(v any) error {
if err := convertEncodeError(enc.encoder.Encode(v)); err != nil {
enc.buf.Reset()
@@ -104,17 +113,19 @@ func (enc *Encoder) Encode(v any) error {
}
func (enc *Encoder) SetEscapeHTML(on bool) {
- var escaper lowmemjson.BackslashEscaper
- if !on {
- escaper = lowmemjson.EscapeDefaultNonHTMLSafe
+ if on {
+ enc.cfg.BackslashEscape = lowmemjson.EscapeDefault
+ } else {
+ enc.cfg.BackslashEscape = lowmemjson.EscapeDefaultNonHTMLSafe
}
- enc.formatter.BackslashEscape = escaper
+ enc.refreshConfig()
}
func (enc *Encoder) SetIndent(prefix, indent string) {
- enc.formatter.Compact = prefix == "" && indent == ""
- enc.formatter.Prefix = prefix
- enc.formatter.Indent = indent
+ enc.cfg.Compact = prefix == "" && indent == ""
+ enc.cfg.Prefix = prefix
+ enc.cfg.Indent = indent
+ enc.refreshConfig()
}
// ReEncode wrappers /////////////////////////////////////////////////
diff --git a/compat/json/compat_test.go b/compat/json/testcompat_test.go
index 07c75bc..42cbf5c 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/testcompat_test.go
@@ -11,8 +11,8 @@ import (
_ "unsafe"
"git.lukeshu.com/go/lowmemjson"
- "git.lukeshu.com/go/lowmemjson/internal/fastio"
"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
"git.lukeshu.com/go/lowmemjson/internal/jsonstruct"
)
@@ -58,20 +58,14 @@ type encodeState struct {
bytes.Buffer
}
-//go:linkname encodeStringFromString git.lukeshu.com/go/lowmemjson.encodeStringFromString
-func encodeStringFromString(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str string) error
-
-//go:linkname encodeStringFromBytes git.lukeshu.com/go/lowmemjson.encodeStringFromBytes
-func encodeStringFromBytes(w fastio.AllWriter, escaper lowmemjson.BackslashEscaper, str []byte) error
-
func (es *encodeState) string(str string, _ bool) {
- if err := encodeStringFromString(&es.Buffer, nil, str); err != nil {
+ if err := jsonstring.EncodeStringFromString(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil {
panic(err)
}
}
func (es *encodeState) stringBytes(str []byte, _ bool) {
- if err := encodeStringFromBytes(&es.Buffer, nil, str); err != nil {
+ if err := jsonstring.EncodeStringFromBytes(&es.Buffer, lowmemjson.EscapeDefault, str); err != nil {
panic(err)
}
}
diff --git a/decode.go b/decode.go
index 1ff8938..8514ec4 100644
--- a/decode.go
+++ b/decode.go
@@ -1145,7 +1145,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco
if err := dec.expectRuneType('"', jsonparse.RuneTypeStringBeg, gTyp); err != nil {
return err
}
- var uhex [4]byte
+ var uhex [3]byte
for {
c, t, err := dec.readRune()
if err != nil {
@@ -1178,18 +1178,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco
panic(fmt.Errorf("should not happen: unexpected rune after backslash: %q", c))
}
case jsonparse.RuneTypeStringEscUA:
- uhex[0], _ = jsonparse.HexToInt(c)
+ uhex[0] = byte(c)
case jsonparse.RuneTypeStringEscUB:
- uhex[1], _ = jsonparse.HexToInt(c)
+ uhex[1] = byte(c)
case jsonparse.RuneTypeStringEscUC:
- uhex[2], _ = jsonparse.HexToInt(c)
+ uhex[2] = byte(c)
case jsonparse.RuneTypeStringEscUD:
- uhex[3], _ = jsonparse.HexToInt(c)
- c = 0 |
- rune(uhex[0])<<12 |
- rune(uhex[1])<<8 |
- rune(uhex[2])<<4 |
- rune(uhex[3])<<0
+ c = hexToRune(uhex[0], uhex[1], uhex[2], byte(c))
handleUnicode:
if utf16.IsSurrogate(c) {
t, err := dec.peekRuneType()
@@ -1219,27 +1214,22 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco
if err != nil {
return err
}
- uhex[0], _ = jsonparse.HexToInt(b)
+ uhex[0] = byte(b)
b, _, err = dec.readRune()
if err != nil {
return err
}
- uhex[1], _ = jsonparse.HexToInt(b)
+ uhex[1] = byte(b)
b, _, err = dec.readRune()
if err != nil {
return err
}
- uhex[2], _ = jsonparse.HexToInt(b)
+ uhex[2] = byte(b)
b, _, err = dec.readRune()
if err != nil {
return err
}
- uhex[3], _ = jsonparse.HexToInt(b)
- c2 := 0 |
- rune(uhex[0])<<12 |
- rune(uhex[1])<<8 |
- rune(uhex[2])<<4 |
- rune(uhex[3])<<0
+ c2 := hexToRune(uhex[0], uhex[1], uhex[2], byte(b))
d := utf16.DecodeRune(c, c2)
if d == utf8.RuneError {
_, _ = out.WriteRune(utf8.RuneError)
diff --git a/encode.go b/encode.go
index d39c862..2e10134 100644
--- a/encode.go
+++ b/encode.go
@@ -16,6 +16,7 @@ import (
"strings"
"unsafe"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
"git.lukeshu.com/go/lowmemjson/internal/jsonstruct"
)
@@ -82,7 +83,11 @@ func (enc *Encoder) Encode(obj any) (err error) {
if enc.isRoot {
enc.w.par.Reset()
}
- if err := encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}); err != nil {
+ escaper := enc.w.cfg.BackslashEscape
+ if escaper == nil {
+ escaper = EscapeDefault
+ }
+ if err := encode(enc.w, reflect.ValueOf(obj), escaper, false, 0, map[any]struct{}{}); err != nil {
if rwe, ok := err.(*ReEncodeWriteError); ok {
err = &EncodeWriteError{
Err: rwe.Err,
@@ -192,7 +197,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo
Err: err,
}
}
- if err := encodeStringFromBytes(w, escaper, text); err != nil {
+ if err := jsonstring.EncodeStringFromBytes(w, escaper, text); err != nil {
return err
}
default:
@@ -295,14 +300,14 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo
} else {
if quote {
var buf bytes.Buffer
- if err := encodeStringFromString(&buf, escaper, val.String()); err != nil {
+ if err := jsonstring.EncodeStringFromString(&buf, escaper, val.String()); err != nil {
return err
}
- if err := encodeStringFromBytes(w, escaper, buf.Bytes()); err != nil {
+ if err := jsonstring.EncodeStringFromBytes(w, escaper, buf.Bytes()); err != nil {
return err
}
} else {
- if err := encodeStringFromString(w, escaper, val.String()); err != nil {
+ if err := jsonstring.EncodeStringFromString(w, escaper, val.String()); err != nil {
return err
}
}
@@ -336,7 +341,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo
}
}
empty = false
- if err := encodeStringFromString(w, escaper, field.Name); err != nil {
+ if err := jsonstring.EncodeStringFromString(w, escaper, field.Name); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
@@ -389,7 +394,7 @@ func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote boo
}
if !strings.HasPrefix(kStr, `"`) {
k.Reset()
- if err := encodeStringFromString(&k, escaper, kStr); err != nil {
+ if err := jsonstring.EncodeStringFromString(&k, escaper, kStr); err != nil {
return err
}
kStr = k.String()
diff --git a/encode_escape.go b/encode_escape.go
index ab0d9c1..97da6e9 100644
--- a/encode_escape.go
+++ b/encode_escape.go
@@ -5,7 +5,10 @@
package lowmemjson
import (
+ "fmt"
"unicode/utf8"
+
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
// BackslashEscapeMode identifies one of the three ways that a
@@ -17,14 +20,35 @@ import (
// single-character)
//
// - as a long Unicode `\uXXXX` backslash sequence
-type BackslashEscapeMode uint8
+type BackslashEscapeMode = jsonstring.BackslashEscapeMode
const (
- BackslashEscapeNone BackslashEscapeMode = iota
- BackslashEscapeShort
- BackslashEscapeUnicode
+ BackslashEscapeNone = jsonstring.BackslashEscapeNone
+ BackslashEscapeShort = jsonstring.BackslashEscapeShort
+ BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode
)
+func hexToInt(c byte) rune {
+ switch {
+ case '0' <= c && c <= '9':
+ return rune(c) - '0'
+ case 'a' <= c && c <= 'f':
+ return rune(c) - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ return rune(c) - 'A' + 10
+ default:
+ panic(fmt.Errorf("should not happen: invalid hex char: %q", c))
+ }
+}
+
+func hexToRune(a, b, c, d byte) rune {
+ return 0 |
+ hexToInt(a)<<12 |
+ hexToInt(b)<<8 |
+ hexToInt(c)<<4 |
+ hexToInt(d)<<0
+}
+
// A BackslashEscaper controls how a ReEncoder emits a character in a
// JSON string. The `rune` argument is the character being
// considered, and the `BackslashEscapeMode` argument is how it was
diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go
deleted file mode 100644
index 3ed5f01..0000000
--- a/internal/jsonparse/hex.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
-//
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-package jsonparse
-
-const Hex = "0123456789abcdef"
-
-func HexToInt(c rune) (byte, bool) {
- switch {
- case '0' <= c && c <= '9':
- return byte(c) - '0', true
- case 'a' <= c && c <= 'f':
- return byte(c) - 'a' + 10, true
- case 'A' <= c && c <= 'F':
- return byte(c) - 'A' + 10, true
- default:
- return 0, false
- }
-}
diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index 73584d9..2f5c1ab 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -14,6 +14,12 @@ import (
var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
+func isHex(c rune) bool {
+ return ('0' <= c && c <= '9') ||
+ ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F')
+}
+
// RuneType is the classification of a rune when parsing JSON input.
// A Parser, rather than grouping runes into tokens and classifying
// tokens, classifies runes directly.
@@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
}
case RuneTypeStringEscU:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUA), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUA), nil
case RuneTypeStringEscUA:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUB), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUB), nil
case RuneTypeStringEscUB:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUC), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUC), nil
case RuneTypeStringEscUC:
- if _, ok := HexToInt(c); ok {
- par.replaceState(RuneTypeStringBeg)
- return RuneTypeStringEscUD, nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEscUD, nil
// number //////////////////////////////////////////////////////////////////////////////////
//
// Here's a flattened drawing of the syntax diagram from www.json.org :
diff --git a/encode_string.go b/internal/jsonstring/encode_string.go
index 208aef4..1b0c68a 100644
--- a/encode_string.go
+++ b/internal/jsonstring/encode_string.go
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: GPL-2.0-or-later
-package lowmemjson
+package jsonstring
import (
"fmt"
@@ -10,22 +10,36 @@ import (
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/fastio"
- "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
)
-func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
+// BackslashEscapeMode is describe in the main lowmemjson package
+// docs.
+type BackslashEscapeMode uint8
+
+const (
+ BackslashEscapeNone BackslashEscapeMode = iota
+ BackslashEscapeShort
+ BackslashEscapeUnicode
+)
+
+// BackslashEscaper is describe in the main lowmemjson package docs.
+type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
+
+func writeStringUnicodeEscape(w io.Writer, c rune) error {
+ const alphabet = "0123456789abcdef"
buf := [6]byte{
'\\',
'u',
- jsonparse.Hex[(c>>12)&0xf],
- jsonparse.Hex[(c>>8)&0xf],
- jsonparse.Hex[(c>>4)&0xf],
- jsonparse.Hex[(c>>0)&0xf],
+ alphabet[(c>>12)&0xf],
+ alphabet[(c>>8)&0xf],
+ alphabet[(c>>4)&0xf],
+ alphabet[(c>>0)&0xf],
}
- return w.Write(buf[:])
+ _, err := w.Write(buf[:])
+ return err
}
-func writeStringShortEscape(w io.Writer, c rune) (int, error) {
+func writeStringShortEscape(w io.Writer, c rune) error {
var b byte
switch c {
case '"', '\\', '/':
@@ -44,14 +58,12 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) {
panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c))
}
buf := [2]byte{'\\', b}
- return w.Write(buf[:])
+ _, err := w.Write(buf[:])
+ return err
}
-func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) {
- if escaper == nil {
- escaper = EscapeDefault
- }
- switch escaper(c, wasEscaped) {
+func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) error {
+ switch escape {
case BackslashEscapeNone:
switch {
case c < 0x0020: // override, gotta escape these
@@ -64,19 +76,22 @@ func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode,
case c == '"' || c == '\\': // override, gotta escape these
return writeStringShortEscape(w, c)
default: // obey
- return w.WriteRune(c)
+ _, err := w.WriteRune(c)
+ return err
}
case BackslashEscapeShort:
switch c {
case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey
return writeStringShortEscape(w, c)
default: // override, can't short-escape these
- return w.WriteRune(c)
+ _, err := w.WriteRune(c)
+ return err
}
case BackslashEscapeUnicode:
switch {
case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?)
- return w.WriteRune(c)
+ _, err := w.WriteRune(c)
+ return err
default: // obey
return writeStringUnicodeEscape(w, c)
}
@@ -85,12 +100,12 @@ func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode,
}
}
-func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error {
+func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error {
if err := w.WriteByte('"'); err != nil {
return err
}
for _, c := range str {
- if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil {
+ if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
return err
}
}
@@ -100,13 +115,13 @@ func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str st
return nil
}
-func encodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error {
+func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error {
if err := w.WriteByte('"'); err != nil {
return err
}
for i := 0; i < len(str); {
c, size := utf8.DecodeRune(str[i:])
- if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil {
+ if err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
return err
}
i += size
diff --git a/reencode.go b/reencode.go
index d19dc1a..f18888c 100644
--- a/reencode.go
+++ b/reencode.go
@@ -12,6 +12,7 @@ import (
"git.lukeshu.com/go/lowmemjson/internal/fastio"
"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
+ "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
// A ReEncoderConfig controls how a ReEncoder should behave.
@@ -71,32 +72,31 @@ type ReEncoderConfig struct {
// bufio.Writer.
func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
return &ReEncoder{
- ReEncoderConfig: cfg,
- out: fastio.NewAllWriter(out),
- specu: new(speculation),
+ cfg: cfg,
+ out: fastio.NewAllWriter(out),
+ specu: new(speculation),
}
}
// A ReEncoder takes a stream of JSON elements (by way of implementing
-// io.Writer and WriteRune), and re-encodes the JSON, writing it to
-// the .Out member.
+// io.Writer, io.StringWriter, io.ByteWriter, and WriteRune), and
+// re-encodes the JSON, writing it to the .Out member.
//
// This is useful for prettifying, minifying, sanitizing, and/or
// validating JSON.
//
// The memory use of a ReEncoder is O(CompactIfUnder+depth).
type ReEncoder struct {
- ReEncoderConfig
+ cfg ReEncoderConfig
out fastio.AllWriter
- // state: .Write's and .WriteString's utf8-decoding buffer
+ // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer
buf [utf8.UTFMax]byte
bufLen int
- // state: .WriteRune
+ // state: contract between the public API and .handleRune
err error
par jsonparse.Parser
- written int
inputPos int64
// state: .handleRune
@@ -104,7 +104,7 @@ type ReEncoder struct {
lastNonSpaceNonEOF jsonparse.RuneType
wasNumber bool
curIndent int
- uhex [4]byte // "\uABCD"-encoded characters in strings
+ uhex [3]byte // "\uABCD"-encoded characters in strings
fracZeros int64
expZero bool
specu *speculation
@@ -165,14 +165,16 @@ func (enc *ReEncoder) Write(p []byte) (int, error) {
c, size := utf8.DecodeRune(enc.buf[:])
n += size - enc.bufLen
enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return 0, enc.err
}
}
for utf8.FullRune(p[n:]) {
c, size := utf8.DecodeRune(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return n, enc.err
}
n += size
}
@@ -192,18 +194,19 @@ func (enc *ReEncoder) WriteString(p string) (int, error) {
c, size := utf8.DecodeRune(enc.buf[:])
n += size - enc.bufLen
enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return 0, enc.err
}
}
for utf8.FullRuneInString(p[n:]) {
c, size := utf8.DecodeRuneInString(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
+ enc.handleRune(c)
+ if enc.err != nil {
+ return n, enc.err
}
n += size
}
- enc.bufLen = copy(enc.buf[:], p[n:])
return len(p), nil
}
@@ -212,6 +215,11 @@ func (enc *ReEncoder) WriteByte(b byte) error {
return fastio.WriteByte(enc, b)
}
+// WriteRune does what you'd expect.
+func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
+ return fastio.WriteRune(enc, c)
+}
+
// Close implements io.Closer; it does what you'd expect, mostly.
//
// The *ReEncoder may continue to be written to with new JSON values
@@ -231,40 +239,21 @@ func (enc *ReEncoder) Close() error {
return enc.err
}
if len(enc.barriers) == 0 {
- if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
return enc.err
}
- if enc.AllowMultipleValues {
+ if enc.cfg.AllowMultipleValues {
enc.par.Reset()
}
}
return nil
}
-// WriteRune writes a single Unicode code point, returning the number
-// of bytes written to the output stream and any error.
-//
-// Even when there is no error, the number of bytes written may be
-// zero (for example, when the rune is whitespace and the ReEncoder is
-// minifying the JSON), or it may be substantially longer than one
-// code point's worth (for example, when `\uXXXX` escaping a character
-// in a string, or when outputing extra whitespace when the ReEncoder
-// is prettifying the JSON).
-func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
- if enc.err != nil {
- return 0, enc.err
- }
- if enc.bufLen > 0 {
- enc.err = fmt.Errorf("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()en: %q", enc.buf[:enc.bufLen])
- return 0, enc.err
- }
-
- enc.written = 0
-
+func (enc *ReEncoder) handleRune(c rune) {
rehandle:
t, err := enc.par.HandleRune(c)
if err != nil {
@@ -272,17 +261,17 @@ rehandle:
Err: err,
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
- if err := enc.handleRune(c, t, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(c, t, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
if t == jsonparse.RuneTypeEOF {
- if enc.AllowMultipleValues && len(enc.barriers) == 0 {
+ if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 {
enc.par.Reset()
goto rehandle
} else {
@@ -290,12 +279,11 @@ rehandle:
Err: fmt.Errorf("invalid character %q after top-level value", c),
Offset: enc.inputPos,
}
- return enc.written, enc.err
+ return
}
}
enc.inputPos += int64(utf8.RuneLen(c))
- return enc.written, enc.err
}
// semi-public API /////////////////////////////////////////////////////////////
@@ -326,8 +314,8 @@ func (enc *ReEncoder) stackSize() int {
return sz
}
-func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error {
- if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" {
+func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error {
+ if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" {
return enc.handleRuneNoSpeculation(c, t)
}
@@ -341,10 +329,10 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er
enc.specu.speculating = true
enc.specu.endWhenStackSize = stackSize - 1
enc.specu.fmt = ReEncoder{
- ReEncoderConfig: enc.ReEncoderConfig,
- out: &enc.specu.compact,
+ cfg: enc.cfg,
+ out: &enc.specu.compact,
}
- enc.specu.fmt.Compact = true
+ enc.specu.fmt.cfg.Compact = true
enc.specu.buf = append(enc.specu.buf, inputTuple{
c: c,
t: t,
@@ -364,18 +352,18 @@ func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) er
t: t,
stackSize: stackSize,
})
- if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil {
+ if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil {
return err
}
switch {
- case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent
+ case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent
buf := append([]inputTuple(nil), enc.specu.buf...)
enc.specu.Reset()
if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil {
return err
}
for _, tuple := range buf[1:] {
- if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil {
+ if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil {
return err
}
}
@@ -410,11 +398,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
if enc.lastNonSpace == jsonparse.RuneTypeEOF {
switch {
case enc.wasNumber && t.IsNumber():
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err, false
}
- case enc.Indent != "" && !enc.Compact:
- if err := enc.emitByte('\n'); err != nil {
+ case enc.cfg.Indent != "" && !enc.cfg.Compact:
+ if err := enc.out.WriteByte('\n'); err != nil {
return err, false
}
}
@@ -432,7 +420,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
fallthrough
default:
for enc.fracZeros > 0 {
- if err := enc.emitByte('0'); err != nil {
+ if err := enc.out.WriteByte('0'); err != nil {
return err, false
}
enc.fracZeros--
@@ -448,7 +436,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
enc.expZero = false
default:
if enc.expZero {
- if err := enc.emitByte('0'); err != nil {
+ if err := enc.out.WriteByte('0'); err != nil {
return err, false
}
enc.expZero = false
@@ -457,11 +445,11 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
// whitespace
switch {
- case enc.Compact:
+ case enc.cfg.Compact:
if t == jsonparse.RuneTypeSpace {
return nil, false
}
- case enc.Indent != "":
+ case enc.cfg.Indent != "":
switch t {
case jsonparse.RuneTypeSpace:
// let us manage whitespace, don't pass it through
@@ -483,7 +471,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
return err, false
}
case jsonparse.RuneTypeObjectColon:
- if err := enc.emitByte(' '); err != nil {
+ if err := enc.out.WriteByte(' '); err != nil {
return err, false
}
}
@@ -499,62 +487,58 @@ func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool)
// handleRuneMain handles the new rune itself, not buffered things.
func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
+ escaper := enc.cfg.BackslashEscape
+ if escaper == nil {
+ escaper = EscapeDefault
+ }
var err error
switch t {
case jsonparse.RuneTypeStringChar:
- err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))
case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU:
// do nothing
case jsonparse.RuneTypeStringEsc1:
switch c {
- case '"':
- err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape))
- case '\\':
- err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape))
- case '/':
- err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape))
+ case '"', '\\', '/':
+ // self
case 'b':
- err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\b'
case 'f':
- err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\f'
case 'n':
- err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\n'
case 'r':
- err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\r'
case 't':
- err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape))
+ c = '\t'
default:
panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c))
}
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))
case jsonparse.RuneTypeStringEscUA:
- enc.uhex[0], _ = jsonparse.HexToInt(c)
+ enc.uhex[0] = byte(c)
case jsonparse.RuneTypeStringEscUB:
- enc.uhex[1], _ = jsonparse.HexToInt(c)
+ enc.uhex[1] = byte(c)
case jsonparse.RuneTypeStringEscUC:
- enc.uhex[2], _ = jsonparse.HexToInt(c)
+ enc.uhex[2] = byte(c)
case jsonparse.RuneTypeStringEscUD:
- enc.uhex[3], _ = jsonparse.HexToInt(c)
- c := 0 |
- rune(enc.uhex[0])<<12 |
- rune(enc.uhex[1])<<8 |
- rune(enc.uhex[2])<<4 |
- rune(enc.uhex[3])<<0
- err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape))
+ c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c))
+ err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))
case jsonparse.RuneTypeError: // EOF explicitly stated by .Close()
fallthrough
case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value
enc.wasNumber = enc.lastNonSpace.IsNumber()
switch {
- case enc.ForceTrailingNewlines && len(enc.barriers) == 0:
+ case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0:
t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
- err = enc.emitByte('\n')
+ err = enc.out.WriteByte('\n')
default:
t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed
}
default:
- err = enc.emitByte(byte(c))
+ err = enc.out.WriteByte(byte(c))
}
if t != jsonparse.RuneTypeSpace {
@@ -566,30 +550,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
return err
}
-func (enc *ReEncoder) emitByte(c byte) error {
- err := enc.out.WriteByte(c)
- if err == nil {
- enc.written++
- }
- return err
-}
-
-func (enc *ReEncoder) emit(n int, err error) error {
- enc.written += n
- return err
-}
-
func (enc *ReEncoder) emitNlIndent() error {
- if err := enc.emitByte('\n'); err != nil {
+ if err := enc.out.WriteByte('\n'); err != nil {
return err
}
- if enc.Prefix != "" {
- if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil {
+ if enc.cfg.Prefix != "" {
+ if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil {
return err
}
}
for i := 0; i < enc.curIndent; i++ {
- if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil {
+ if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil {
return err
}
}