summaryrefslogtreecommitdiff
path: root/parse.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2022-08-13 18:20:30 -0600
committerLuke Shumaker <lukeshu@lukeshu.com>2022-08-14 12:05:42 -0600
commitb379bd7c2fba1e7d2c9429b3ffb93afdabd88cbd (patch)
treeab1a689bfcecb4a03f947b925dc505c97e0fb514 /parse.go
parentb7093b386f8009a1c4f35f08185826fa2545fdb4 (diff)
parse: Add a general-purpose parser based on the reencoder
Diffstat (limited to 'parse.go')
-rw-r--r--parse.go700
1 files changed, 281 insertions, 419 deletions
diff --git a/parse.go b/parse.go
index 50c8ba3..e09b85a 100644
--- a/parse.go
+++ b/parse.go
@@ -5,403 +5,287 @@
package lowmemjson
import (
- "errors"
"fmt"
"io"
- "unicode/utf8"
+ iofs "io/fs"
)
-type reencodeState func(rune) error
-
-type ReEncoder struct {
- Out io.Writer
-
- // Whether to minify the JSON.
- Compact bool
- // String to use to indent; ignored if Compact is true.
- Indent string
- // String to put before indents, for testing-compat with
- // encoding/json only.
- prefix string
- // Returns whether a given character in a string should be
- // "\uXXXX" escaped. The bool argument is whether it was
- // \u-escaped in the input. This does not affect characters
- // that must or must-not be \u-escaped to be valid JSON.
- //
- // If not set, then EscapeUnicodeDefault is used.
- UnicodeEscape func(rune, bool) bool
-
- bailAfterCurrent bool
-
- // state: .Write's utf8-decoding buffer
- buf [utf8.UTFMax]byte
- bufLen int
-
- // state: .WriteRune
- err error
- inputPos int64
- written int
- stack []reencodeState
- stack0IsNumber bool
- curIndent int
-
- // state: reencodeState-specific
- stateBuf []byte
-}
-
-// public API //////////////////////////////////////////////////////////////////
-
-func (enc *ReEncoder) Write(p []byte) (int, error) {
- if len(p) == 0 {
- return 0, nil
- }
- var n int
- if enc.bufLen > 0 {
- copy(enc.buf[enc.bufLen:], p)
- c, size := utf8.DecodeRune(enc.buf[:])
- n += size - enc.bufLen
- enc.bufLen = 0
- if _, err := enc.WriteRune(c); err != nil {
- return 0, err
- }
- }
- for utf8.FullRune(p[n:]) {
- c, size := utf8.DecodeRune(p[n:])
- if _, err := enc.WriteRune(c); err != nil {
- return n, err
- }
- n += size
- }
- enc.bufLen = copy(enc.buf[:], p[n:])
- return len(p), nil
-}
-
-func (enc *ReEncoder) Flush() error {
- if enc.bufLen > 0 {
- return &SyntaxError{fmt.Sprintf("EOF: unflushed unicode garbage: %q", enc.buf[:enc.bufLen]), enc.inputPos}
- }
- switch len(enc.stack) {
- case 0:
- return nil
- case 1:
- if enc.stack0IsNumber {
- enc.Compact = true
- return enc.state('\n')
- }
- fallthrough
- default:
- return &SyntaxError{fmt.Sprintf("EOF: in the middle of a value"), enc.inputPos}
- }
-}
+type RuneType uint8
+
+const (
+ RuneTypeError = RuneType(iota)
+
+ RuneTypeSpace // whitespace
+
+ RuneTypeObjectBeg // '{'
+ RuneTypeObjectColon // ':'
+ RuneTypeObjectComma // ','
+ RuneTypeObjectEnd // '}'
+
+ RuneTypeArrayBeg // '['
+ RuneTypeArrayComma // ','
+ RuneTypeArrayEnd // ']'
+
+ RuneTypeStringBeg // opening '"'
+ RuneTypeStringChar // normal character
+ RuneTypeStringEsc // backslash
+ RuneTypeStringEsc1 // single-char after a backslash
+ RuneTypeStringEscU // \uABCD : u
+ RuneTypeStringEscUA // \uABCD : A
+ RuneTypeStringEscUB // \uABCD : B
+ RuneTypeStringEscUC // \uABCD : C
+ RuneTypeStringEscUD // \uABCD : D
+ RuneTypeStringEnd // closing '"'
+
+ RuneTypeNumberInt // 0|[1-9][0-9]*
+ RuneTypeNumberFrac // \.[0-9]*
+ RuneTypeNumberExp // [eE][-+]?[0-9]
+
+ RuneTypeTrueT
+ RuneTypeTrueR
+ RuneTypeTrueU
+ RuneTypeTrueE
+
+ RuneTypeFalseF
+ RuneTypeFalseA
+ RuneTypeFalseL
+ RuneTypeFalseS
+ RuneTypeFalseE
+
+ RuneTypeNullN
+ RuneTypeNullU
+ RuneTypeNullL1
+ RuneTypeNullL2
+)
-func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
- if enc.err != nil {
- return 0, enc.err
- }
- if enc.bufLen != 0 {
- enc.err = errors.New("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()n")
- return 0, enc.err
- }
- enc.written = 0
- enc.err = enc.state(c)
- enc.inputPos += int64(utf8.RuneLen(c))
- return enc.written, enc.err
-}
+type parseState func(rune) (RuneType, error)
-// io helpers //////////////////////////////////////////////////////////////////
+type parser struct {
+ err error
+ closed bool
-func (enc *ReEncoder) emitByte(c byte) error {
- err := writeByte(enc.Out, c)
- if err == nil {
- enc.written++
- }
- return err
+ stack []parseState
+ stack0IsNumber bool // whether stack[0] is a number-state; affects how EOF is handled
}
-func (enc *ReEncoder) emit(n int, err error) error {
- enc.written += n
- return err
-}
+// "public" API ////////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) nlIndent() error {
- if enc.Compact || enc.Indent == "" {
- return nil
+func (par *parser) HandleRune(c rune) (RuneType, error) {
+ if par.closed {
+ return RuneTypeError, iofs.ErrClosed
}
- if err := enc.emitByte('\n'); err != nil {
- return err
+ if par.err != nil {
+ return RuneTypeError, par.err
}
- if enc.prefix != "" {
- if err := enc.emit(io.WriteString(enc.Out, enc.prefix)); err != nil {
- return err
- }
- }
- for i := 0; i < enc.curIndent; i++ {
- if err := enc.emit(io.WriteString(enc.Out, enc.Indent)); err != nil {
- return err
+ return par.state(c)
+}
+
+func (par *parser) HandleEOF() error {
+ if par.closed {
+ return iofs.ErrClosed
+ }
+ if par.err == nil {
+ switch len(par.stack) {
+ case 0:
+ par.err = nil
+ case 1:
+ if par.stack0IsNumber {
+ _, par.err = par.state('\n')
+ }
+ fallthrough
+ default:
+ par.err = io.ErrUnexpectedEOF
}
}
- return nil
+ par.closed = true
+ return par.err
}
-// state helpers ///////////////////////////////////////////////////////////////
+// state helpers ///////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) pushState(state reencodeState, isNumber bool) {
- if len(enc.stack) == 0 {
- enc.stack0IsNumber = isNumber
+func (par *parser) pushState(state parseState, isNumber bool) {
+ if len(par.stack) == 0 {
+ par.stack0IsNumber = isNumber
}
- enc.stack = append(enc.stack, state)
+ par.stack = append(par.stack, state)
}
-func (enc *ReEncoder) replaceState(state reencodeState, isNumber bool) {
- if len(enc.stack) == 1 {
- enc.stack0IsNumber = isNumber
+func (par *parser) replaceState(state parseState, isNumber bool) {
+ if len(par.stack) == 1 {
+ par.stack0IsNumber = isNumber
}
- enc.stack[len(enc.stack)-1] = state
+ par.stack[len(par.stack)-1] = state
}
-func (enc *ReEncoder) popState() {
- if len(enc.stack) == 1 {
- enc.stack0IsNumber = false
+func (par *parser) popState() {
+ if len(par.stack) == 1 {
+ par.stack0IsNumber = false
}
- enc.stack = enc.stack[:len(enc.stack)-1]
+ par.stack = par.stack[:len(par.stack)-1]
}
-var errBailedAfterCurrent = errors.New("bailed after current")
-
-func (enc *ReEncoder) state(c rune) error {
- if len(enc.stack) == 0 {
- if enc.bailAfterCurrent {
- return errBailedAfterCurrent
- }
- enc.pushState(enc.stateAny, false)
+func (par *parser) state(c rune) (RuneType, error) {
+ if len(par.stack) == 0 {
+ par.pushState(par.stateAny, false)
}
- return enc.stack[len(enc.stack)-1](c)
+ return par.stack[len(par.stack)-1](c)
}
-// any /////////////////////////////////////////////////////////////////////////////////////////////
+// state: any //////////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateAny(c rune) error {
+func (par *parser) stateAny(c rune) (RuneType, error) {
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
- if enc.Compact || enc.Indent != "" {
- return nil
- }
+ return RuneTypeSpace, nil
case '{':
- enc.replaceState(enc.stateInEmptyObject, false)
- enc.curIndent++
+ par.replaceState(par.stateInObject, false)
+ return RuneTypeObjectBeg, nil
case '[':
- enc.replaceState(enc.stateInEmptyArray, false)
- enc.curIndent++
+ par.replaceState(par.stateInArray, false)
+ return RuneTypeArrayBeg, nil
case '"':
- enc.replaceState(enc.stateInString, false)
+ par.replaceState(par.stateInString, false)
+ return RuneTypeStringBeg, nil
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- enc.replaceState(enc.stateNumberA, true)
- return enc.state(c)
+ par.replaceState(par.stateNumberA, true)
+ return par.state(c)
case 't':
- enc.replaceState(enc.stateInTrue, false)
- enc.stateBuf = append(enc.stateBuf[:0], 't')
+ par.replaceState(par.stateTrueT, false)
+ return RuneTypeTrueT, nil
case 'f':
- enc.replaceState(enc.stateInFalse, false)
- enc.stateBuf = append(enc.stateBuf[:0], 'f')
+ par.replaceState(par.stateFalseF, false)
+ return RuneTypeFalseF, nil
case 'n':
- enc.replaceState(enc.stateInNull, false)
- enc.stateBuf = append(enc.stateBuf[:0], 'n')
+ par.replaceState(par.stateNullN, false)
+ return RuneTypeNullN, nil
default:
- return &SyntaxError{fmt.Sprintf("any: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c)
}
- return enc.emitByte(byte(c))
}
-// object //////////////////////////////////////////////////////////////////////////////////////////
+// state: object ///////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateInEmptyObject(c rune) error { return enc._stateInObject(c, false) }
-func (enc *ReEncoder) stateInNonEmptyObject(c rune) error { return enc._stateInObject(c, true) }
-func (enc *ReEncoder) _stateInObject(c rune, nonempty bool) error {
+func (par *parser) stateInObject(c rune) (RuneType, error) {
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
- if enc.Compact || enc.Indent != "" {
- return nil
- }
+ return RuneTypeSpace, nil
case '"':
- if err := enc.nlIndent(); err != nil {
- return err
- }
- enc.replaceState(enc.stateInKV, false)
- enc.pushState(enc.stateInString, false)
+ par.replaceState(par.stateAfterK, false)
+ par.pushState(par.stateInString, false)
+ return RuneTypeStringBeg, nil
case '}':
- enc.popState()
- enc.curIndent--
- if nonempty {
- if err := enc.nlIndent(); err != nil {
- return err
- }
- }
+ par.popState()
+ return RuneTypeObjectEnd, nil
default:
- return &SyntaxError{fmt.Sprintf("object: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c)
}
- return enc.emitByte(byte(c))
}
-func (enc *ReEncoder) stateInKV(c rune) error {
+func (par *parser) stateAfterK(c rune) (RuneType, error) {
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
- if enc.Compact || enc.Indent != "" {
- return nil
- }
- return enc.emitByte(byte(c))
+ return RuneTypeSpace, nil
case ':':
- enc.replaceState(enc.stateAfterV, false)
- enc.pushState(enc.stateAny, false)
- if err := enc.emitByte(byte(c)); err != nil {
- return err
- }
- if !enc.Compact && enc.Indent != "" {
- return enc.emitByte(' ')
- }
- return nil
+ par.replaceState(par.stateAfterV, false)
+ par.pushState(par.stateAny, false)
+ return RuneTypeObjectColon, nil
default:
- return &SyntaxError{fmt.Sprintf("object member: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c)
}
}
-func (enc *ReEncoder) stateAfterV(c rune) error {
+func (par *parser) stateAfterV(c rune) (RuneType, error) {
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
- if enc.Compact || enc.Indent != "" {
- return nil
- }
+ return RuneTypeSpace, nil
case ',':
- enc.replaceState(enc.stateInNonEmptyObject, false)
+ par.replaceState(par.stateInObject, false)
+ return RuneTypeObjectComma, nil
case '}':
- enc.popState()
- enc.curIndent--
- if err := enc.nlIndent(); err != nil {
- return err
- }
+ par.popState()
+ return RuneTypeObjectEnd, nil
default:
- return &SyntaxError{fmt.Sprintf("object member: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c)
}
- return enc.emitByte(byte(c))
}
-// array ///////////////////////////////////////////////////////////////////////////////////////////
+// state: array ////////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateInEmptyArray(c rune) error { return enc._stateInArray(c, false) }
-func (enc *ReEncoder) stateInNonEmptyArray(c rune) error { return enc._stateInArray(c, true) }
-func (enc *ReEncoder) _stateInArray(c rune, nonempty bool) error {
+func (par *parser) stateInArray(c rune) (RuneType, error) {
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
- if enc.Compact || enc.Indent != "" {
- return nil
- }
+ return RuneTypeSpace, nil
case ']':
- enc.popState()
- enc.curIndent--
- if nonempty {
- if err := enc.nlIndent(); err != nil {
- return err
- }
- }
+ par.popState()
+ return RuneTypeArrayEnd, nil
default:
- if err := enc.nlIndent(); err != nil {
- return err
- }
- enc.replaceState(enc.stateAfterItem, false)
- enc.pushState(enc.stateAny, false)
- return enc.state(c)
+ par.replaceState(par.stateAfterItem, false)
+ par.pushState(par.stateAny, false)
+ return par.state(c)
}
- return enc.emitByte(byte(c))
}
-func (enc *ReEncoder) stateAfterItem(c rune) error {
+func (par *parser) stateAfterItem(c rune) (RuneType, error) {
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
- if enc.Compact || enc.Indent != "" {
- return nil
- }
+ return RuneTypeSpace, nil
case ',':
- enc.replaceState(enc.stateInNonEmptyArray, false)
+ par.replaceState(par.stateInArray, false)
+ return RuneTypeArrayComma, nil
case ']':
- enc.popState()
- enc.curIndent--
- if err := enc.nlIndent(); err != nil {
- return err
- }
+ par.popState()
+ return RuneTypeArrayEnd, nil
default:
- return &SyntaxError{fmt.Sprintf("array: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c)
}
- return enc.emitByte(byte(c))
}
-// string //////////////////////////////////////////////////////////////////////////////////////////
+// state: string ///////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateInString(c rune) error {
+func (par *parser) stateInString(c rune) (RuneType, error) {
switch {
case c == '\\':
- enc.replaceState(enc.stateInBackslash, false)
- return nil
+ par.replaceState(par.stateInEsc, false)
+ return RuneTypeStringEsc, nil
case c == '"':
- enc.popState()
- return enc.emitByte(byte(c))
+ par.popState()
+ return RuneTypeStringEnd, nil
case 0x0020 <= c && c <= 0x10FFFF:
- return enc.emit(writeStringChar(enc.Out, c, false, enc.UnicodeEscape))
+ return RuneTypeStringChar, nil
default:
- return &SyntaxError{fmt.Sprintf("string: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c)
}
}
-func (enc *ReEncoder) stateInBackslash(c rune) error {
+func (par *parser) stateInEsc(c rune) (RuneType, error) {
switch c {
- case '"':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '"', false, enc.UnicodeEscape))
- case '\\':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '\\', false, enc.UnicodeEscape))
- case '/':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '/', false, enc.UnicodeEscape))
- case 'b':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '\b', false, enc.UnicodeEscape))
- case 'f':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '\f', false, enc.UnicodeEscape))
- case 'n':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '\n', false, enc.UnicodeEscape))
- case 'r':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '\r', false, enc.UnicodeEscape))
- case 't':
- enc.replaceState(enc.stateInString, false)
- return enc.emit(writeStringChar(enc.Out, '\t', false, enc.UnicodeEscape))
+ case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
+ par.replaceState(par.stateInString, false)
+ return RuneTypeStringEsc1, nil
case 'u':
- enc.replaceState(enc.stateInUnicode, false)
- return nil
+ par.replaceState(par.stateInEscU, false)
+ return RuneTypeStringEscU, nil
default:
- return &SyntaxError{fmt.Sprintf("string backslash sequence: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
}
}
-func (enc *ReEncoder) stateInUnicode(c rune) error {
+func (par *parser) _stateInEscU(c rune, typ RuneType, nxt parseState) (RuneType, error) {
switch {
- case '0' <= c && c <= '9':
- enc.stateBuf = append(enc.stateBuf, byte(c)-'0')
- case 'a' <= c && c <= 'f':
- enc.stateBuf = append(enc.stateBuf, byte(c)-'a'+10)
- case 'A' <= c && c <= 'F':
- enc.stateBuf = append(enc.stateBuf, byte(c)-'A'+10)
+ case ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'):
+ par.replaceState(nxt, false)
+ return typ, nil
default:
- return &SyntaxError{fmt.Sprintf("string unicode sequence: unexpected character: %c", c), enc.inputPos}
- }
- if len(enc.stateBuf) == 4 {
- enc.replaceState(enc.stateInString, false)
- c := 0 |
- rune(enc.stateBuf[0])<<12 |
- rune(enc.stateBuf[1])<<8 |
- rune(enc.stateBuf[2])<<4 |
- rune(enc.stateBuf[3])<<0
- enc.stateBuf = enc.stateBuf[:0]
- return enc.emit(writeStringChar(enc.Out, c, true, enc.UnicodeEscape))
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
- return nil
+}
+func (par *parser) stateInEscU(c rune) (RuneType, error) {
+ return par._stateInEscU(c, RuneTypeStringEscUA, par.stateInEscUA)
+}
+func (par *parser) stateInEscUA(c rune) (RuneType, error) {
+ return par._stateInEscU(c, RuneTypeStringEscUB, par.stateInEscUB)
+}
+func (par *parser) stateInEscUB(c rune) (RuneType, error) {
+ return par._stateInEscU(c, RuneTypeStringEscUC, par.stateInEscUC)
+}
+func (par *parser) stateInEscUC(c rune) (RuneType, error) {
+ return par._stateInEscU(c, RuneTypeStringEscUD, par.stateInString)
}
-// number //////////////////////////////////////////////////////////////////////////////////////////
+// state: number ///////////////////////////////////////////////////////////////////////////////////
// Here's a flattened drawing of the syntax diagram from www.json.org :
//
@@ -415,7 +299,7 @@ func (enc *ReEncoder) stateInUnicode(c rune) error {
// ╰─"+"─╯
//
// Now here it is slightly redrawn, and with each distinct state our
-// decoder can be in marked with a single-capital-letter:
+// parser can be in marked with a single-capital-letter:
//
// [-------------- integer ------------][--------- fraction --------][--------- exponent ---------]
// >─A─╮───────╭──╮─"0"─────────C─╭─────────╮──────────────────╭─────────╮──────────────────────────╭─>
@@ -427,172 +311,150 @@ func (enc *ReEncoder) stateInUnicode(c rune) error {
// ╰─"+"─╯
//
// Which state we're at is the 'X' in 'stateNumberX'.
-//
-// Besides just traversing that, there are a few compressions we want to make:
-//
-// - trim trailing 0s from fraction the (but don't remove the
-// fraction if it's all 0s); do this by making the F state a little
-// special. This requires a little more state, because when we
-// encounter the 0 we don't yet know if it's trailing. So, store
-// the number of maybe-trailing zeros in enc.stateBuf[0]; if that
-// reaches 255, then bleed over to enc.stateBuf[1] and so on.
-//
-// - trim leading 0s from the exponent (but don't remove the exponent
-// if it's all 0s); do this by making the H state a little special.
-// Record whether we've seen a non-zero digit in enc.stateBuf[0]
-// (0=false, 1=true).
-// integer-part ////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateNumberA(c rune) error { // start
+// number: integer-part ////////////////////////////////////////////////////////
+func (par *parser) stateNumberA(c rune) (RuneType, error) { // start
switch c {
case '-':
- enc.replaceState(enc.stateNumberB, true)
+ par.replaceState(par.stateNumberB, true)
+ return RuneTypeNumberInt, nil
case '0':
- enc.replaceState(enc.stateNumberC, true)
+ par.replaceState(par.stateNumberC, true)
+ return RuneTypeNumberInt, nil
case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- enc.replaceState(enc.stateNumberD, true)
+ par.replaceState(par.stateNumberD, true)
+ return RuneTypeNumberInt, nil
default:
- return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
}
- return enc.emitByte(byte(c))
}
-func (enc *ReEncoder) stateNumberB(c rune) error { // got a leading "-"
+func (par *parser) stateNumberB(c rune) (RuneType, error) { // got a leading "-"
switch c {
case '0':
- enc.replaceState(enc.stateNumberC, true)
+ par.replaceState(par.stateNumberC, true)
+ return RuneTypeNumberInt, nil
case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- enc.replaceState(enc.stateNumberD, true)
+ par.replaceState(par.stateNumberD, true)
+ return RuneTypeNumberInt, nil
default:
- return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
}
- return enc.emitByte(byte(c))
}
-func (enc *ReEncoder) stateNumberC(c rune) error { // ready for the fraction or exponent part to start
+func (par *parser) stateNumberC(c rune) (RuneType, error) { // ready for the fraction or exponent part to start
switch c {
case '.':
- enc.replaceState(enc.stateNumberE, true)
- return enc.emitByte('.')
+ par.replaceState(par.stateNumberE, true)
+ return RuneTypeNumberFrac, nil
case 'e', 'E':
- enc.replaceState(enc.stateNumberG, true)
- enc.stateBuf = append(enc.stateBuf[:0], 0)
- return enc.emitByte('e')
+ par.replaceState(par.stateNumberG, true)
+ return RuneTypeNumberExp, nil
default:
- enc.popState()
- return enc.state(c)
+ par.popState()
+ return par.state(c)
}
}
-func (enc *ReEncoder) stateNumberD(c rune) error { // in the integer part
+func (par *parser) stateNumberD(c rune) (RuneType, error) { // in the integer part
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return enc.emitByte(byte(c))
+ return RuneTypeNumberInt, nil
case '.':
- enc.replaceState(enc.stateNumberE, true)
- return enc.emitByte('.')
+ par.replaceState(par.stateNumberE, true)
+ return RuneTypeNumberFrac, nil
case 'e', 'E':
- enc.replaceState(enc.stateNumberG, true)
- enc.stateBuf = append(enc.stateBuf[:0], 0)
- return enc.emitByte('e')
+ par.replaceState(par.stateNumberG, true)
+ return RuneTypeNumberExp, nil
default:
- enc.popState()
- return enc.state(c)
+ par.popState()
+ return par.state(c)
}
}
-// fraction-part ///////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateNumberE(c rune) error { // got a ".", ready to read a number for the fraction part
+// number: fraction-part ///////////////////////////////////////////////////////
+func (par *parser) stateNumberE(c rune) (RuneType, error) { // got a ".", ready to read a number for the fraction part
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- enc.replaceState(enc.stateNumberF, true)
- return enc.emitByte(byte(c))
+ par.replaceState(par.stateNumberF, true)
+ return RuneTypeNumberFrac, nil
default:
- return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
}
}
-func (enc *ReEncoder) stateNumberF(c rune) error { // in the fraction part
+func (par *parser) stateNumberF(c rune) (RuneType, error) { // in the fraction part
switch c {
- case '0':
- if len(enc.stateBuf) > 0 && enc.stateBuf[len(enc.stateBuf)-1] < 255 {
- enc.stateBuf[len(enc.stateBuf)-1]++
- } else {
- enc.stateBuf = append(enc.stateBuf, 1)
- }
- return nil
- case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- for len(enc.stateBuf) > 0 {
- if err := enc.emitByte('0'); err != nil {
- return err
- }
- if enc.stateBuf[len(enc.stateBuf)-1] == 1 {
- enc.stateBuf = enc.stateBuf[:len(enc.stateBuf)-1]
- } else {
- enc.stateBuf[len(enc.stateBuf)-1]--
- }
- }
- return enc.emitByte(byte(c))
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return RuneTypeNumberFrac, nil
case 'e', 'E':
- enc.replaceState(enc.stateNumberG, true)
- enc.stateBuf = append(enc.stateBuf[:0], 0)
- return enc.emitByte('e')
+ par.replaceState(par.stateNumberG, true)
+ return RuneTypeNumberExp, nil
default:
- enc.stateBuf = enc.stateBuf[:0]
- enc.popState()
- return enc.state(c)
+ par.popState()
+ return par.state(c)
}
}
-// exponent-part ///////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateNumberG(c rune) error { // got a leading "e"
+// number: exponent-part ///////////////////////////////////////////////////////
+func (par *parser) stateNumberG(c rune) (RuneType, error) { // got a leading "e"
switch c {
- case '-', '+':
- enc.replaceState(enc.stateNumberH, true)
- return enc.emitByte(byte(c))
- case '0':
- enc.replaceState(enc.stateNumberH, true)
- return nil
- case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- enc.replaceState(enc.stateNumberH, true)
- enc.stateBuf[0] = 1
- return enc.emitByte(byte(c))
+ case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ par.replaceState(par.stateNumberH, true)
+ return RuneTypeNumberExp, nil
default:
- enc.stateBuf = enc.stateBuf[:0]
- return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos}
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c)
}
}
-func (enc *ReEncoder) stateNumberH(c rune) error { // in the exponent's number part
+func (par *parser) stateNumberH(c rune) (RuneType, error) { // in the exponent's number part
switch c {
- case '0':
- if enc.stateBuf[0] == 0 {
- return nil
- }
- return enc.emitByte('0')
- case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- enc.stateBuf[0] = 1
- return enc.emitByte(byte(c))
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return RuneTypeNumberExp, nil
default:
- if enc.stateBuf[0] == 0 {
- if err := enc.emitByte('0'); err != nil {
- return err
- }
- }
- enc.stateBuf = enc.stateBuf[:0]
- enc.popState()
- return enc.state(c)
+ par.popState()
+ return par.state(c)
}
}
-// literals ////////////////////////////////////////////////////////////////////////////////////////
+// state: literals /////////////////////////////////////////////////////////////////////////////////
-func (enc *ReEncoder) stateInTrue(c rune) error { return enc._stateInLiteral(c, "true") }
-func (enc *ReEncoder) stateInFalse(c rune) error { return enc._stateInLiteral(c, "false") }
-func (enc *ReEncoder) stateInNull(c rune) error { return enc._stateInLiteral(c, "null") }
-func (enc *ReEncoder) _stateInLiteral(c rune, full string) error {
- if c != rune(full[len(enc.stateBuf)]) {
- return &SyntaxError{fmt.Sprintf("%s: unexpected character: %c", full, c), enc.inputPos}
+func (par *parser) l(c rune, full string, exp rune, typ RuneType, nxt parseState) (RuneType, error) {
+ if c != exp {
+ return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", full, c)
}
- enc.stateBuf = append(enc.stateBuf, byte(c))
- if len(enc.stateBuf) == len(full) {
- enc.stateBuf = enc.stateBuf[:0]
- enc.popState()
+ if nxt == nil {
+ par.popState()
+ } else {
+ par.replaceState(nxt, false)
}
- return enc.emitByte(byte(c))
+ return typ, nil
+}
+
+func (par *parser) stateTrueT(c rune) (RuneType, error) {
+ return par.l(c, "true", 'r', RuneTypeTrueR, par.stateTrueR)
+}
+func (par *parser) stateTrueR(c rune) (RuneType, error) {
+ return par.l(c, "true", 'u', RuneTypeTrueU, par.stateTrueU)
+}
+func (par *parser) stateTrueU(c rune) (RuneType, error) {
+ return par.l(c, "true", 'e', RuneTypeTrueR, nil)
+}
+
+func (par *parser) stateFalseF(c rune) (RuneType, error) {
+ return par.l(c, "false", 'a', RuneTypeFalseA, par.stateFalseA)
+}
+func (par *parser) stateFalseA(c rune) (RuneType, error) {
+ return par.l(c, "false", 'l', RuneTypeFalseL, par.stateFalseL)
+}
+func (par *parser) stateFalseL(c rune) (RuneType, error) {
+ return par.l(c, "false", 's', RuneTypeFalseS, par.stateFalseS)
+}
+func (par *parser) stateFalseS(c rune) (RuneType, error) {
+ return par.l(c, "false", 'e', RuneTypeFalseE, nil)
+}
+
+func (par *parser) stateNullN(c rune) (RuneType, error) {
+ return par.l(c, "null", 'u', RuneTypeNullU, par.stateNullU)
+}
+func (par *parser) stateNullU(c rune) (RuneType, error) {
+ return par.l(c, "null", 'l', RuneTypeNullL1, par.stateNullL)
+}
+func (par *parser) stateNullL(c rune) (RuneType, error) {
+ return par.l(c, "null", 'l', RuneTypeNullL2, nil)
}