diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lowmemjson/adapter_test.go | 19 | ||||
-rw-r--r-- | lib/lowmemjson/borrowed_tables.go | 115 | ||||
-rw-r--r-- | lib/lowmemjson/encode.go | 235 | ||||
-rw-r--r-- | lib/lowmemjson/misc.go | 130 | ||||
-rw-r--r-- | lib/lowmemjson/reencode.go | 247 |
5 files changed, 321 insertions, 425 deletions
diff --git a/lib/lowmemjson/adapter_test.go b/lib/lowmemjson/adapter_test.go index fd300e4..7c08459 100644 --- a/lib/lowmemjson/adapter_test.go +++ b/lib/lowmemjson/adapter_test.go @@ -27,18 +27,23 @@ var ( func MarshalIndent(v any, prefix, indent string) ([]byte, error) { var buf bytes.Buffer - err := Encode(&buf, v, prefix, indent) + formatter := &ReEncoder{ + Out: &buf, + Indent: indent, + prefix: prefix, + } + err := Encode(formatter, v) return buf.Bytes(), err } func Marshal(v any) ([]byte, error) { - big, err := MarshalIndent(v, "", "") - if err != nil { - return nil, err + var buf bytes.Buffer + formatter := &ReEncoder{ + Out: &buf, + Compact: true, } - var small bytes.Buffer - err = json.Compact(&small, big) - return small.Bytes(), err + err := Encode(formatter, v) + return buf.Bytes(), err } var Unmarshal = json.Unmarshal // TODO diff --git a/lib/lowmemjson/borrowed_tables.go b/lib/lowmemjson/borrowed_tables.go deleted file mode 100644 index 5c26148..0000000 --- a/lib/lowmemjson/borrowed_tables.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lowmemjson - -import "unicode/utf8" - -const hex = "0123456789abcdef" - -// htmlSafeSet holds the value true if the ASCII character with the given -// array position can be safely represented inside a JSON string, embedded -// inside of HTML <script> tags, without any additional escaping. -// -// All values are true except for the ASCII control characters (0-31), the -// double quote ("), the backslash character ("\"), HTML opening and closing -// tags ("<" and ">"), and the ampersand ("&"). -var htmlSafeSet = [utf8.RuneSelf]bool{ - ' ': true, - '!': true, - '"': false, - '#': true, - '$': true, - '%': true, - '&': false, - '\'': true, - '(': true, - ')': true, - '*': true, - '+': true, - ',': true, - '-': true, - '.': true, - '/': true, - '0': true, - '1': true, - '2': true, - '3': true, - '4': true, - '5': true, - '6': true, - '7': true, - '8': true, - '9': true, - ':': true, - ';': true, - '<': false, - '=': true, - '>': false, - '?': true, - '@': true, - 'A': true, - 'B': true, - 'C': true, - 'D': true, - 'E': true, - 'F': true, - 'G': true, - 'H': true, - 'I': true, - 'J': true, - 'K': true, - 'L': true, - 'M': true, - 'N': true, - 'O': true, - 'P': true, - 'Q': true, - 'R': true, - 'S': true, - 'T': true, - 'U': true, - 'V': true, - 'W': true, - 'X': true, - 'Y': true, - 'Z': true, - '[': true, - '\\': false, - ']': true, - '^': true, - '_': true, - '`': true, - 'a': true, - 'b': true, - 'c': true, - 'd': true, - 'e': true, - 'f': true, - 'g': true, - 'h': true, - 'i': true, - 'j': true, - 'k': true, - 'l': true, - 'm': true, - 'n': true, - 'o': true, - 'p': true, - 'q': true, - 'r': true, - 's': true, - 't': true, - 'u': true, - 'v': true, - 'w': true, - 'x': true, - 'y': true, - 'z': true, - '{': true, - '|': true, - '}': true, - '~': true, - '\u007f': true, -} diff --git a/lib/lowmemjson/encode.go b/lib/lowmemjson/encode.go index 1fbd228..3f671ea 100644 --- a/lib/lowmemjson/encode.go +++ b/lib/lowmemjson/encode.go @@ -13,47 +13,29 @@ import ( "sort" "strconv" "strings" - "unicode/utf8" ) -const Tab = "\t" - type Encoder interface { - EncodeJSON(w io.Writer, prefix, indent string) error + EncodeJSON(w io.Writer) error } type encodeError struct { Err error } -func writeByte(w io.Writer, b byte) { - var buf [1]byte - buf[0] = b - if _, err := w.Write(buf[:]); err != nil { +func encodeWriteByte(w io.Writer, b byte) { + if err := writeByte(w, b); err != nil { panic(encodeError{err}) } } -func writeString(w io.Writer, str string) { +func encodeWriteString(w io.Writer, str string) { if _, err := io.WriteString(w, str); err != nil { panic(encodeError{err}) } } -func writeBytes[T interface{ ~[]byte | ~string }](w io.Writer, seq T) { - iface := any(seq) - if str, ok := iface.(string); ok { - if _, err := io.WriteString(w, str); err != nil { - panic(encodeError{err}) - } - } else { - if _, err := w.Write(iface.([]byte)); err != nil { - panic(encodeError{err}) - } - } -} - -func Encode(w io.Writer, obj any, prefix, indent string) (err error) { +func Encode(w io.Writer, obj any) (err error) { defer func() { if r := recover(); r != nil { if e, ok := r.(encodeError); ok { @@ -63,20 +45,22 @@ func Encode(w io.Writer, obj any, prefix, indent string) (err error) { } } }() - encode(w, reflect.ValueOf(obj), prefix, indent, false) + encode(w, reflect.ValueOf(obj), false) + if f, ok := w.(interface{ Flush() error }); ok { + return f.Flush() + } return nil } var ( - numberType = reflect.TypeOf(json.Number("")) encoderType = reflect.TypeOf((*Encoder)(nil)).Elem() jsonMarshalerType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() textMarshalerType = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem() ) -func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { +func encode(w io.Writer, val reflect.Value, quote bool) { if !val.IsValid() { - writeString(w, "null") + encodeWriteString(w, "null") return } switch { @@ -86,15 +70,15 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { fallthrough case val.Type().Implements(encoderType): if val.Kind() == reflect.Pointer && val.IsNil() { - writeString(w, "null") + encodeWriteString(w, "null") return } obj, ok := val.Interface().(Encoder) if !ok { - writeString(w, "null") + encodeWriteString(w, "null") return } - if err := obj.EncodeJSON(w, prefix, indent); err != nil { + if err := obj.EncodeJSON(w); err != nil { panic(encodeError{err}) } @@ -103,12 +87,12 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { fallthrough case val.Type().Implements(jsonMarshalerType): if val.Kind() == reflect.Pointer && val.IsNil() { - writeString(w, "null") + encodeWriteString(w, "null") return } obj, ok := val.Interface().(json.Marshaler) if !ok { - writeString(w, "null") + encodeWriteString(w, "null") return } dat, err := obj.MarshalJSON() @@ -124,12 +108,12 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { fallthrough case val.Type().Implements(textMarshalerType): if val.Kind() == reflect.Pointer && val.IsNil() { - writeString(w, "null") + encodeWriteString(w, "null") return } obj, ok := val.Interface().(encoding.TextMarshaler) if !ok { - writeString(w, "null") + encodeWriteString(w, "null") return } text, err := obj.MarshalText() @@ -142,39 +126,39 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { switch val.Kind() { case reflect.Bool: if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } if val.Bool() { - writeString(w, "true") + encodeWriteString(w, "true") } else { - writeString(w, "false") + encodeWriteString(w, "false") } if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } - writeString(w, strconv.FormatInt(val.Int(), 10)) + encodeWriteString(w, strconv.FormatInt(val.Int(), 10)) if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } - writeString(w, strconv.FormatUint(val.Uint(), 10)) + encodeWriteString(w, strconv.FormatUint(val.Uint(), 10)) if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } case reflect.Float32, reflect.Float64: if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } - encodeTODO(w, val, prefix, indent) + encodeTODO(w, val) if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } case reflect.String: if val.Type() == numberType { @@ -183,11 +167,11 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { numStr = "0" } if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } - writeString(w, numStr) + encodeWriteString(w, numStr) if quote { - writeByte(w, '"') + encodeWriteByte(w, '"') } } else { if quote { @@ -201,11 +185,10 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { case reflect.Interface: // .Kind() will only be reflect.Interface if // there's no concrete type. - writeString(w, "null") + encodeWriteString(w, "null") case reflect.Struct: - writeByte(w, '{') - var numFields int - subPrefix := prefix + indent + encodeWriteByte(w, '{') + empty := true nextStructField: for _, field := range indexStruct(val.Type()) { fVal := val @@ -221,31 +204,25 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { if field.OmitEmpty && isEmptyValue(fVal) { continue nextStructField } - if numFields > 0 { - writeByte(w, ',') + if !empty { + encodeWriteByte(w, ',') } - numFields++ - writeByte(w, '\n') - writeString(w, subPrefix) + empty = false encodeString(w, field.Name) - writeString(w, ": ") - encode(w, fVal, subPrefix, indent, field.Quote) + encodeWriteByte(w, ':') + encode(w, fVal, field.Quote) } - if numFields > 0 { - writeByte(w, '\n') - writeString(w, prefix) - } - writeByte(w, '}') + encodeWriteByte(w, '}') case reflect.Map: if val.IsNil() { - writeString(w, "null") + encodeWriteString(w, "null") return } if val.Len() == 0 { - writeString(w, "{}") + encodeWriteString(w, "{}") return } - writeByte(w, '{') + encodeWriteByte(w, '{') type kv struct { K string @@ -255,7 +232,7 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { iter := val.MapRange() for i := 0; iter.Next(); i++ { var k strings.Builder - encode(&k, iter.Key(), "", Tab, false) + encode(&k, iter.Key(), false) kStr := k.String() if kStr == "null" { kStr = `""` @@ -267,36 +244,31 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { return kvs[i].K < kvs[j].K }) - subPrefix := prefix + indent for i, kv := range kvs { if i > 0 { - writeByte(w, ',') + encodeWriteByte(w, ',') } - writeByte(w, '\n') - writeString(w, subPrefix) - writeString(w, kv.K) - writeString(w, ": ") - encode(w, kv.V, subPrefix, indent, false) + encodeWriteString(w, kv.K) + encodeWriteByte(w, ':') + encode(w, kv.V, false) } - writeByte(w, '\n') - writeString(w, prefix) - writeByte(w, '}') + encodeWriteByte(w, '}') case reflect.Slice: switch { case val.Type().Elem().Kind() == reflect.Uint8: - encodeTODO(w, val, prefix, indent) + encodeTODO(w, val) case val.IsNil(): - writeString(w, "null") + encodeWriteString(w, "null") default: - encodeArray(w, val, prefix, indent) + encodeArray(w, val) } case reflect.Array: - encodeArray(w, val, prefix, indent) + encodeArray(w, val) case reflect.Pointer: if val.IsNil() { - writeString(w, "null") + encodeWriteString(w, "null") } else { - encode(w, val.Elem(), prefix, indent, quote) + encode(w, val.Elem(), quote) } default: panic(encodeError{&json.UnsupportedTypeError{ @@ -306,97 +278,28 @@ func encode(w io.Writer, val reflect.Value, prefix, indent string, quote bool) { } } -func decodeRune[T interface{ ~[]byte | ~string }](s T) (r rune, size int) { - iface := any(s) - if str, ok := iface.(string); ok { - return utf8.DecodeRuneInString(str) - } else { - return utf8.DecodeRune(iface.([]byte)) - } -} - -// might as well be borrowed func encodeString[T interface{ ~[]byte | ~string }](w io.Writer, str T) { - writeByte(w, '"') - start := 0 + encodeWriteByte(w, '"') for i := 0; i < len(str); { - if b := str[i]; b < utf8.RuneSelf { - if htmlSafeSet[b] { - i++ - continue - } - if start < i { - writeBytes(w, str[start:i]) - } - writeByte(w, '\\') - switch b { - case '\\', '"': - writeByte(w, b) - case '\n': - writeByte(w, 'n') - case '\r': - writeByte(w, 'r') - case '\t': - writeByte(w, 't') - default: - writeString(w, `u00`) - writeByte(w, hex[b>>4]) - writeByte(w, hex[b&0xF]) - } - i++ - start = i - continue - } c, size := decodeRune(str[i:]) - if c == utf8.RuneError && size == 1 { - if start < i { - writeBytes(w, str[start:i]) - } - writeString(w, `\ufffd`) - i += size - start = i - continue - } - // U+2028 is LINE SEPARATOR. - // U+2029 is PARAGRAPH SEPARATOR. - // They are both technically valid characters in JSON strings, - // but don't work in JSONP, which has to be evaluated as JavaScript, - // and can lead to security holes there. It is valid JSON to - // escape them, so we do so unconditionally. - // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. - if c == '\u2028' || c == '\u2029' { - if start < i { - writeBytes(w, str[start:i]) - } - writeString(w, `\u202`) - writeByte(w, hex[c&0xF]) - i += size - start = i - continue + if err := writeStringChar(w, c, false, nil); err != nil { + panic(encodeError{err}) } i += size } - if start < len(str) { - writeBytes(w, str[start:]) - } - writeByte(w, '"') + encodeWriteByte(w, '"') } -func encodeArray(w io.Writer, val reflect.Value, prefix, indent string) { - subPrefix := prefix + indent - writeByte(w, '[') +func encodeArray(w io.Writer, val reflect.Value) { + encodeWriteByte(w, '[') n := val.Len() for i := 0; i < n; i++ { if i > 0 { - writeByte(w, ',') + encodeWriteByte(w, ',') } - writeByte(w, '\n') - writeString(w, subPrefix) - encode(w, val.Index(i), subPrefix, indent, false) + encode(w, val.Index(i), false) } - writeByte(w, '\n') - writeString(w, prefix) - writeByte(w, ']') + encodeWriteByte(w, ']') } // borrowed @@ -418,8 +321,8 @@ func isEmptyValue(v reflect.Value) bool { return false } -func encodeTODO(w io.Writer, val reflect.Value, prefix, indent string) { - bs, err := json.MarshalIndent(val.Interface(), prefix, indent) +func encodeTODO(w io.Writer, val reflect.Value) { + bs, err := json.Marshal(val.Interface()) if err != nil { panic(encodeError{err}) } diff --git a/lib/lowmemjson/misc.go b/lib/lowmemjson/misc.go new file mode 100644 index 0000000..d1f8f25 --- /dev/null +++ b/lib/lowmemjson/misc.go @@ -0,0 +1,130 @@ +// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "encoding/json" + "io" + "reflect" + "unicode/utf8" +) + +const Tab = "\t" + +const hex = "0123456789abcdef" + +var numberType = reflect.TypeOf(json.Number("")) + +// generic I/O ///////////////////////////////////////////////////////////////// + +func decodeRune[T interface{ ~[]byte | ~string }](s T) (r rune, size int) { + iface := any(s) + if str, ok := iface.(string); ok { + return utf8.DecodeRuneInString(str) + } else { + return utf8.DecodeRune(iface.([]byte)) + } +} + +func writeByte(w io.Writer, c byte) error { + if br, ok := w.(interface{ WriteByte(byte) error }); ok { + return br.WriteByte(c) + } + var buf [1]byte + buf[0] = c + if _, err := w.Write(buf[:]); err != nil { + return err + } + return nil +} + +func writeRune(w io.Writer, c rune) (int, error) { + if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { + return rw.WriteRune(c) + } + var buf [utf8.UTFMax]byte + n := utf8.EncodeRune(buf[:], c) + return w.Write(buf[:n]) +} + +// JSON string encoding //////////////////////////////////////////////////////// + +func UnicodeEscapeJSSafe(c rune, _ bool) bool { + // JSON is notionally a JS subset, but that's not actually + // true. + // + // http://timelessrepo.com/json-isnt-a-javascript-subset + switch c { + case '\u2028', '\u2029': + return true + default: + return false + } +} + +func UnicodeEscapeHTMLSafe(c rune, wasEscaped bool) bool { + switch c { + case '&', '<', '>': + return true + default: + return UnicodeEscapeJSSafe(c, wasEscaped) + } +} + +func UnicodeEscapeDefault(c rune, wasEscaped bool) bool { + switch c { + case '\b', '\f': + return true + default: + return UnicodeEscapeHTMLSafe(c, wasEscaped) + } +} + +func writeStringUnicodeEscape(w io.Writer, c rune) error { + buf := [6]byte{ + '\\', + 'u', + hex[(c>>12)&0xf], + hex[(c>>8)&0xf], + hex[(c>>4)&0xf], + hex[(c>>0)&0xf], + } + _, err := w.Write(buf[:]) + return err +} +func writeStringShortEscape(w io.Writer, c byte) error { + buf := [2]byte{'\\', c} + _, err := w.Write(buf[:]) + return err +} +func writeStringChar(w io.Writer, c rune, wasEscaped bool, escaper func(rune, bool) bool) error { + if escaper == nil { + escaper = UnicodeEscapeDefault + } + switch { + case c <= 0xFFFF && escaper(c, wasEscaped): + return writeStringUnicodeEscape(w, c) + case c == '"' || c == '\\': + return writeStringShortEscape(w, byte(c)) + case c < 0x0020: + switch c { + case '\b': + return writeStringShortEscape(w, 'b') + case '\f': + return writeStringShortEscape(w, 'f') + case '\n': + return writeStringShortEscape(w, 'n') + case '\r': + return writeStringShortEscape(w, 'r') + case '\t': + return writeStringShortEscape(w, 't') + default: + return writeStringUnicodeEscape(w, c) + } + default: + _, err := writeRune(w, c) + return err + } +} diff --git a/lib/lowmemjson/reencode.go b/lib/lowmemjson/reencode.go index c295d2c..721b0b9 100644 --- a/lib/lowmemjson/reencode.go +++ b/lib/lowmemjson/reencode.go @@ -7,29 +7,32 @@ package lowmemjson import ( "errors" "fmt" + "io" "unicode/utf8" ) type reencodeState func(rune) error type ReEncoder struct { - Out interface { - WriteRune(rune) error - WriteByte(byte) error - WriteString(string) (int, error) - } + Out io.Writer // Whether to minify the JSON. - Compact bool + Compact bool // String to use to indent; ignored if Compact is true. - Indent string + Indent string + // String to put before indents, for testing-compat with + // encoding/json only. + prefix string // Returns whether a given character in a string should be // "\uXXXX" escaped. The bool argument is whether it was - // \u-escaped in the input. + // \u-escaped in the input. This does not affect characters + // that must or must-not be \u-escaped to be valid JSON. + // + // If not set, then EscapeUnicodeDefault is used. UnicodeEscape func(rune, bool) bool // state: .Write's utf8-decoding buffer - buf [4]byte + buf [utf8.UTFMax]byte bufLen int // state: .WriteRune @@ -42,6 +45,8 @@ type ReEncoder struct { stateBuf []byte } +// public API ////////////////////////////////////////////////////////////////// + func (enc *ReEncoder) Write(p []byte) (int, error) { if len(p) == 0 { return 0, nil @@ -63,7 +68,7 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { } n += size } - enc.bufLen = copy(enc.buf[:], p) + enc.bufLen = copy(enc.buf[:], p[n:]) return n, nil } @@ -100,6 +105,30 @@ func (enc *ReEncoder) WriteRune(c rune) (err error) { return enc.state(c) } +// io helpers ////////////////////////////////////////////////////////////////// + +func (enc *ReEncoder) nlIndent() error { + if enc.Compact || enc.Indent == "" { + return nil + } + if err := writeByte(enc.Out, '\n'); err != nil { + return err + } + if enc.prefix != "" { + if _, err := io.WriteString(enc.Out, enc.prefix); err != nil { + return err + } + } + for i := 0; i < enc.curIndent; i++ { + if _, err := io.WriteString(enc.Out, enc.Indent); err != nil { + return err + } + } + return nil +} + +// state helpers /////////////////////////////////////////////////////////////// + func (enc *ReEncoder) pushState(state reencodeState, isNumber bool) { if len(enc.stack) == 0 { enc.stack0IsNumber = isNumber @@ -126,21 +155,6 @@ func (enc *ReEncoder) state(c rune) error { return enc.stack[len(enc.stack)-1](c) } -func (enc *ReEncoder) nlIndent() error { - if enc.Compact || enc.Indent == "" { - return nil - } - if err := enc.Out.WriteByte('\n'); err != nil { - return err - } - for i := 0; i < enc.curIndent; i++ { - if _, err := enc.Out.WriteString(enc.Indent); err != nil { - return err - } - } - return nil -} - // any ///////////////////////////////////////////////////////////////////////////////////////////// func (enc *ReEncoder) stateAny(c rune) error { @@ -170,9 +184,9 @@ func (enc *ReEncoder) stateAny(c rune) error { enc.replaceState(enc.stateInNull, false) enc.stateBuf = append(enc.stateBuf[:0], 'n') default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode value: unexpected character: %c", c) } - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) } // object ////////////////////////////////////////////////////////////////////////////////////////// @@ -200,9 +214,9 @@ func (enc *ReEncoder) _stateInObject(c rune, nonempty bool) error { } } default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode object: unexpected character: %c", c) } - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) } func (enc *ReEncoder) stateInKV(c rune) error { switch c { @@ -210,13 +224,20 @@ func (enc *ReEncoder) stateInKV(c rune) error { if enc.Compact || enc.Indent != "" { return nil } + return writeByte(enc.Out, byte(c)) case ':': enc.replaceState(enc.stateAfterV, false) enc.pushState(enc.stateAny, false) + if err := writeByte(enc.Out, byte(c)); err != nil { + return err + } + if !enc.Compact && enc.Indent != "" { + return writeByte(enc.Out, ' ') + } + return nil default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode object member: unexpected character: %c", c) } - return enc.Out.WriteRune(c) } func (enc *ReEncoder) stateAfterV(c rune) error { switch c { @@ -233,9 +254,9 @@ func (enc *ReEncoder) stateAfterV(c rune) error { return err } default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode object member: unexpected character: %c", c) } - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) } // array /////////////////////////////////////////////////////////////////////////////////////////// @@ -264,7 +285,7 @@ func (enc *ReEncoder) _stateInArray(c rune, nonempty bool) error { enc.pushState(enc.stateAny, false) return enc.state(c) } - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) } func (enc *ReEncoder) stateAfterItem(c rune) error { switch c { @@ -281,69 +302,13 @@ func (enc *ReEncoder) stateAfterItem(c rune) error { return err } default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode array: unexpected character: %c", c) } - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) } // string ////////////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) emitStringUnicodeEscape(c rune) error { - if err := enc.Out.WriteByte('\\'); err != nil { - return err - } - if err := enc.Out.WriteByte('u'); err != nil { - return err - } - if err := enc.Out.WriteByte(hex[(c>>24)&0xff]); err != nil { - return err - } - if err := enc.Out.WriteByte(hex[(c>>16)&0xff]); err != nil { - return err - } - if err := enc.Out.WriteByte(hex[(c>>8)&0xff]); err != nil { - return err - } - if err := enc.Out.WriteByte(hex[(c>>0)&0xff]); err != nil { - return err - } - return nil -} -func (enc *ReEncoder) emitStringShortEscape(c byte) error { - if err := enc.Out.WriteByte('\\'); err != nil { - return err - } - if err := enc.Out.WriteByte(c); err != nil { - return err - } - return nil -} -func (enc *ReEncoder) emitStringChar(c rune) error { - switch { - case enc.UnicodeEscape(c, false): - return enc.emitStringUnicodeEscape(c) - case c == '"' || c == '\\': - return enc.emitStringShortEscape(byte(c)) - case c < 0x0020 || c > 0x10FFFF: - switch c { - case '\b': - return enc.emitStringShortEscape('b') - case '\f': - return enc.emitStringShortEscape('f') - case '\n': - return enc.emitStringShortEscape('n') - case '\r': - return enc.emitStringShortEscape('r') - case '\t': - return enc.emitStringShortEscape('t') - default: - return enc.emitStringUnicodeEscape(c) - } - default: - return enc.Out.WriteRune(c) - } -} - func (enc *ReEncoder) stateInString(c rune) error { switch { case c == '\\': @@ -351,34 +316,41 @@ func (enc *ReEncoder) stateInString(c rune) error { return nil case c == '"': enc.popState() - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) case 0x0020 <= c && c <= 0x10FFFF: - return enc.emitStringChar(c) + return writeStringChar(enc.Out, c, false, enc.UnicodeEscape) default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode string: unexpected character: %c", c) } } func (enc *ReEncoder) stateInBackslash(c rune) error { switch c { case '"': - return enc.emitStringChar('"') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '"', false, enc.UnicodeEscape) case '\\': - return enc.emitStringChar('\\') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '\\', false, enc.UnicodeEscape) case 'b': - return enc.emitStringChar('\b') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '\b', false, enc.UnicodeEscape) case 'f': - return enc.emitStringChar('\f') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '\f', false, enc.UnicodeEscape) case 'n': - return enc.emitStringChar('\n') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '\n', false, enc.UnicodeEscape) case 'r': - return enc.emitStringChar('\r') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '\r', false, enc.UnicodeEscape) case 't': - return enc.emitStringChar('\t') + enc.replaceState(enc.stateInString, false) + return writeStringChar(enc.Out, '\t', false, enc.UnicodeEscape) case 'u': enc.replaceState(enc.stateInUnicode, false) return nil default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode backslash sequence: unexpected character: %c", c) } } func (enc *ReEncoder) stateInUnicode(c rune) error { @@ -386,21 +358,21 @@ func (enc *ReEncoder) stateInUnicode(c rune) error { case '0' <= c && c <= '9': enc.stateBuf = append(enc.stateBuf, byte(c)-'0') case 'a' <= c && c <= 'f': - enc.stateBuf = append(enc.stateBuf, byte(c)-'a') + enc.stateBuf = append(enc.stateBuf, byte(c)-'a'+10) case 'A' <= c && c <= 'F': - enc.stateBuf = append(enc.stateBuf, byte(c)-'A') + enc.stateBuf = append(enc.stateBuf, byte(c)-'A'+10) default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode backslash sequence: unexpected character: %c", c) } if len(enc.stateBuf) == 4 { enc.replaceState(enc.stateInString, false) c := 0 | - rune(enc.stateBuf[0])<<24 | - rune(enc.stateBuf[1])<<16 | - rune(enc.stateBuf[2])<<8 | + rune(enc.stateBuf[0])<<12 | + rune(enc.stateBuf[1])<<8 | + rune(enc.stateBuf[2])<<4 | rune(enc.stateBuf[3])<<0 enc.stateBuf = enc.stateBuf[:0] - return enc.emitStringChar(c) + return writeStringChar(enc.Out, c, true, enc.UnicodeEscape) } return nil } @@ -456,9 +428,9 @@ func (enc *ReEncoder) stateNumberA(c rune) error { // start case '1', '2', '3', '4', '5', '6', '7', '8', '9': enc.replaceState(enc.stateNumberD, true) default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode number: unexpected character: %c", c) } - return enc.Out.WriteByte(byte(c)) + return writeByte(enc.Out, byte(c)) } func (enc *ReEncoder) stateNumberB(c rune) error { // got a leading "-" switch c { @@ -467,19 +439,19 @@ func (enc *ReEncoder) stateNumberB(c rune) error { // got a leading "-" case '1', '2', '3', '4', '5', '6', '7', '8', '9': enc.replaceState(enc.stateNumberD, true) default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode number: unexpected character: %c", c) } - return enc.Out.WriteByte(byte(c)) + return writeByte(enc.Out, byte(c)) } func (enc *ReEncoder) stateNumberC(c rune) error { // ready for the fraction or exponent part to start switch c { case '.': enc.replaceState(enc.stateNumberE, true) - return enc.Out.WriteByte('.') + return writeByte(enc.Out, '.') case 'e', 'E': enc.replaceState(enc.stateNumberG, true) enc.stateBuf = append(enc.stateBuf[:0], 0) - return enc.Out.WriteByte('e') + return writeByte(enc.Out, 'e') default: enc.popState() return enc.state(c) @@ -488,14 +460,14 @@ func (enc *ReEncoder) stateNumberC(c rune) error { // ready for the fraction or func (enc *ReEncoder) stateNumberD(c rune) error { // in the integer part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return enc.Out.WriteByte(byte(c)) + return writeByte(enc.Out, byte(c)) case '.': enc.replaceState(enc.stateNumberE, true) - return enc.Out.WriteByte('.') + return writeByte(enc.Out, '.') case 'e', 'E': enc.replaceState(enc.stateNumberG, true) enc.stateBuf = append(enc.stateBuf[:0], 0) - return enc.Out.WriteByte('e') + return writeByte(enc.Out, 'e') default: enc.popState() return enc.state(c) @@ -507,9 +479,9 @@ func (enc *ReEncoder) stateNumberE(c rune) error { // got a ".", ready to read a switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': enc.replaceState(enc.stateNumberF, true) - return enc.Out.WriteByte(byte(c)) + return writeByte(enc.Out, byte(c)) default: - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode number: unexpected character: %c", c) } } func (enc *ReEncoder) stateNumberF(c rune) error { // in the fraction part @@ -523,7 +495,7 @@ func (enc *ReEncoder) stateNumberF(c rune) error { // in the fraction part return nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': for len(enc.stateBuf) > 0 { - if err := enc.Out.WriteByte('0'); err != nil { + if err := writeByte(enc.Out, '0'); err != nil { return err } if enc.stateBuf[len(enc.stateBuf)-1] == 1 { @@ -532,10 +504,11 @@ func (enc *ReEncoder) stateNumberF(c rune) error { // in the fraction part enc.stateBuf[len(enc.stateBuf)-1]-- } } - return enc.Out.WriteByte(byte(c)) + return writeByte(enc.Out, byte(c)) case 'e', 'E': + enc.replaceState(enc.stateNumberG, true) enc.stateBuf = append(enc.stateBuf[:0], 0) - return enc.Out.WriteByte('e') + return writeByte(enc.Out, 'e') default: enc.stateBuf = enc.stateBuf[:0] enc.popState() @@ -548,32 +521,32 @@ func (enc *ReEncoder) stateNumberG(c rune) error { // got a leading "e" switch c { case '-', '+': enc.replaceState(enc.stateNumberH, true) - return enc.Out.WriteByte(byte(c)) + return writeByte(enc.Out, byte(c)) case '0': enc.replaceState(enc.stateNumberH, true) return nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': enc.replaceState(enc.stateNumberH, true) - enc.stateBuf[1] = 1 - return enc.Out.WriteByte(byte(c)) + enc.stateBuf[0] = 1 + return writeByte(enc.Out, byte(c)) default: enc.stateBuf = enc.stateBuf[:0] - return fmt.Errorf("unexpected character: %c", c) + return fmt.Errorf("decode number: unexpected character: %c", c) } } func (enc *ReEncoder) stateNumberH(c rune) error { // in the exponent's number part switch c { case '0': - if enc.stateBuf[1] == 0 { + if enc.stateBuf[0] == 0 { return nil } - return enc.Out.WriteByte('0') + return writeByte(enc.Out, '0') case '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.stateBuf[1] = 1 - return enc.Out.WriteByte(byte(c)) + enc.stateBuf[0] = 1 + return writeByte(enc.Out, byte(c)) default: - if enc.stateBuf[1] == 0 { - if err := enc.Out.WriteByte('0'); err != nil { + if enc.stateBuf[0] == 0 { + if err := writeByte(enc.Out, '0'); err != nil { return err } } @@ -589,12 +562,12 @@ func (enc *ReEncoder) stateInTrue(c rune) error { return enc._stateInLiteral(c, func (enc *ReEncoder) stateInFalse(c rune) error { return enc._stateInLiteral(c, "false") } func (enc *ReEncoder) stateInNull(c rune) error { return enc._stateInLiteral(c, "null") } func (enc *ReEncoder) _stateInLiteral(c rune, full string) error { - if c != rune(full[len(enc.stateBuf)+1]) { - return fmt.Errorf("unexpected character: %c", c) + if c != rune(full[len(enc.stateBuf)]) { + return fmt.Errorf("decode %s: unexpected character: %c", full, c) } enc.stateBuf = append(enc.stateBuf, byte(c)) if len(enc.stateBuf) == len(full) { enc.popState() } - return enc.Out.WriteRune(c) + return writeByte(enc.Out, byte(c)) } |