summaryrefslogtreecommitdiff
path: root/parse.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2022-08-13 22:05:20 -0600
committerLuke Shumaker <lukeshu@lukeshu.com>2022-08-14 13:44:39 -0600
commit4b00a61c33d6a448c59c5509c0a408f527308c8b (patch)
treefb38f3b53a01438054798c76c6fd471d39637d42 /parse.go
parent567beedfc8f6418e27259fa774e6c7ba1a685c22 (diff)
parse: Rework to avoid passing around function pointers
"Ignore whitespace" is probably essential for viewing this patch.
Diffstat (limited to 'parse.go')
-rw-r--r--parse.go776
1 files changed, 403 insertions, 373 deletions
diff --git a/parse.go b/parse.go
index a1c5472..d4b55eb 100644
--- a/parse.go
+++ b/parse.go
@@ -8,6 +8,7 @@ import (
"fmt"
"io"
iofs "io/fs"
+ "strings"
)
type RuneType uint8
@@ -37,9 +38,14 @@ const (
RuneTypeStringEscUD // \uABCD : D
RuneTypeStringEnd // closing '"'
- RuneTypeNumberInt // 0|[1-9][0-9]*
- RuneTypeNumberFrac // \.[0-9]*
- RuneTypeNumberExp // [eE][-+]?[0-9]
+ RuneTypeNumberIntNeg
+ RuneTypeNumberIntZero
+ RuneTypeNumberIntDig
+ RuneTypeNumberFracDot
+ RuneTypeNumberFracDig
+ RuneTypeNumberExpE
+ RuneTypeNumberExpSign
+ RuneTypeNumberExpDig
RuneTypeTrueT
RuneTypeTrueR
@@ -58,29 +64,118 @@ const (
RuneTypeNullL2
)
+func (t RuneType) String() string {
+ str, ok := map[RuneType]string{
+ RuneTypeError: "x",
+
+ RuneTypeSpace: " ",
+
+ RuneTypeObjectBeg: "{",
+ RuneTypeObjectColon: ":",
+ RuneTypeObjectComma: "o",
+ RuneTypeObjectEnd: "}",
+
+ RuneTypeArrayBeg: "[",
+ RuneTypeArrayComma: "a",
+ RuneTypeArrayEnd: "]",
+
+ RuneTypeStringBeg: "โ€œ",
+ RuneTypeStringChar: "c",
+ RuneTypeStringEsc: "\\",
+ RuneTypeStringEsc1: "b",
+ RuneTypeStringEscU: "u",
+ RuneTypeStringEscUA: "A",
+ RuneTypeStringEscUB: "B",
+ RuneTypeStringEscUC: "C",
+ RuneTypeStringEscUD: "D",
+ RuneTypeStringEnd: "โ€",
+
+ RuneTypeNumberIntNeg: "-",
+ RuneTypeNumberIntZero: "0",
+ RuneTypeNumberIntDig: "1",
+ RuneTypeNumberFracDot: ".",
+ RuneTypeNumberFracDig: "2",
+ RuneTypeNumberExpE: "e",
+ RuneTypeNumberExpSign: "+",
+ RuneTypeNumberExpDig: "3",
+
+ RuneTypeTrueT: "๐•ฅ", // double-struck
+ RuneTypeTrueR: "๐•ฃ",
+ RuneTypeTrueU: "๐•ฆ",
+ RuneTypeTrueE: "๐•–",
+
+ RuneTypeFalseF: "๐”ฃ", // fraktur
+ RuneTypeFalseA: "๐”ž",
+ RuneTypeFalseL: "๐”ฉ",
+ RuneTypeFalseS: "๐”ฐ",
+ RuneTypeFalseE: "๐”ข",
+
+ RuneTypeNullN: "โ“", // circled
+ RuneTypeNullU: "โ“ค",
+ RuneTypeNullL1: "โ“›",
+ RuneTypeNullL2: "โ“", // +uppercase
+ }[t]
+ if ok {
+ return str
+ }
+ return fmt.Sprintf("<%d>", t)
+}
+
+func (t RuneType) IsNumber() bool {
+ return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig
+}
+
+// { waiting for key to start or '}'
+// โ€ reading key / waiting for colon
+// : waiting for value to start
+// , reading value / waiting for ',' or '}'
+//
+// {"x":"y","a":"b"}
+//
+// { {
+// โ€โ€œ {"
+// โ€โ€œ {"x
+// โ€ {"x"
+// : {"x":
+// oโ€œ {"x":"
+// oโ€œ {"x":"y
+// o {"x":"y"
+// { {"x":"y",
+// โ€โ€œ {"x":"y","
+// โ€โ€œ {"x":"y","a
+// โ€ {"x":"y","a"
+// : {"x":"y","a":
+// oโ€œ {"x":"
+// oโ€œ {"x":"y
+// o {"x":"y"
+//
+// [ waiting for item to start or ']'
+// a reading item / waiting for ',' or ']'
+
type parseState func(rune) (RuneType, error)
-type parser struct {
+type Parser struct {
err error
closed bool
- stack []parseState
- stack0IsNumber bool // whether stack[0] is a number-state; affects how EOF is handled
+ bailAfterCurrent bool // bad hack
+
+ stack []RuneType
}
-// "public" API ////////////////////////////////////////////////////////////////////////////////////
+// public API //////////////////////////////////////////////////////////////////////////////////////
-func (par *parser) HandleRune(c rune) (RuneType, error) {
+func (par *Parser) HandleRune(c rune) (typ RuneType, err error) {
if par.closed {
return RuneTypeError, iofs.ErrClosed
}
if par.err != nil {
return RuneTypeError, par.err
}
- return par.state(c)
+ return par.handleRune(c)
}
-func (par *parser) HandleEOF() error {
+func (par *Parser) HandleEOF() error {
if par.closed {
return iofs.ErrClosed
}
@@ -89,8 +184,9 @@ func (par *parser) HandleEOF() error {
case 0:
par.err = nil
case 1:
- if par.stack0IsNumber {
- _, par.err = par.state('\n')
+ if par.stack[0].IsNumber() {
+ _, par.err = par.handleRune('\n')
+ break
}
fallthrough
default:
@@ -101,386 +197,320 @@ func (par *parser) HandleEOF() error {
return par.err
}
-// state helpers ///////////////////////////////////////////////////////////////////////////////////
+// internal ////////////////////////////////////////////////////////////////////////////////////////
-func (par *parser) pushState(state parseState, isNumber bool) {
- if len(par.stack) == 0 {
- par.stack0IsNumber = isNumber
- }
+func (par *Parser) pushState(state RuneType) RuneType {
par.stack = append(par.stack, state)
+ return state
}
-func (par *parser) replaceState(state parseState, isNumber bool) {
- if len(par.stack) == 1 {
- par.stack0IsNumber = isNumber
- }
+func (par *Parser) replaceState(state RuneType) RuneType {
par.stack[len(par.stack)-1] = state
+ return state
}
-func (par *parser) popState() {
- if len(par.stack) == 1 {
- par.stack0IsNumber = false
- }
+func (par *Parser) popState() {
par.stack = par.stack[:len(par.stack)-1]
}
-func (par *parser) state(c rune) (RuneType, error) {
- if len(par.stack) == 0 {
- par.pushState(par.stateAny, false)
- }
- return par.stack[len(par.stack)-1](c)
-}
-
-// state: any //////////////////////////////////////////////////////////////////////////////////////
-
-func (par *parser) stateAny(c rune) (RuneType, error) {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- case '{':
- par.replaceState(par.stateInObject, false)
- return RuneTypeObjectBeg, nil
- case '[':
- par.replaceState(par.stateInArray, false)
- return RuneTypeArrayBeg, nil
- case '"':
- par.replaceState(par.stateInString, false)
- return RuneTypeStringBeg, nil
- case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- par.replaceState(par.stateNumberA, true)
- return par.state(c)
- case 't':
- par.replaceState(par.stateTrueT, false)
- return RuneTypeTrueT, nil
- case 'f':
- par.replaceState(par.stateFalseF, false)
- return RuneTypeFalseF, nil
- case 'n':
- par.replaceState(par.stateNullN, false)
- return RuneTypeNullN, nil
- default:
- return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c)
- }
-}
-
-// state: object ///////////////////////////////////////////////////////////////////////////////////
-
-func (par *parser) stateInObject(c rune) (RuneType, error) {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- case '"':
- par.replaceState(par.stateAfterK, false)
- par.pushState(par.stateInString, false)
- return RuneTypeStringBeg, nil
- case '}':
- par.popState()
- return RuneTypeObjectEnd, nil
- default:
- return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c)
- }
-}
-func (par *parser) stateAfterK(c rune) (RuneType, error) {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- case ':':
- par.replaceState(par.stateAfterV, false)
- par.pushState(par.stateAny, false)
- return RuneTypeObjectColon, nil
- default:
- return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c)
- }
-}
-func (par *parser) stateAfterV(c rune) (RuneType, error) {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- case ',':
- par.replaceState(par.stateInObject, false)
- return RuneTypeObjectComma, nil
- case '}':
- par.popState()
- return RuneTypeObjectEnd, nil
- default:
- return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c)
- }
-}
-
-// state: array ////////////////////////////////////////////////////////////////////////////////////
-
-func (par *parser) stateInArray(c rune) (RuneType, error) {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- case ']':
- par.popState()
- return RuneTypeArrayEnd, nil
- default:
- par.replaceState(par.stateAfterItem, false)
- par.pushState(par.stateAny, false)
- return par.state(c)
- }
-}
-func (par *parser) stateAfterItem(c rune) (RuneType, error) {
- switch c {
- case 0x0020, 0x000A, 0x000D, 0x0009:
- return RuneTypeSpace, nil
- case ',':
- par.replaceState(par.stateInArray, false)
- return RuneTypeArrayComma, nil
- case ']':
- par.popState()
- return RuneTypeArrayEnd, nil
- default:
- return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c)
- }
-}
-
-// state: string ///////////////////////////////////////////////////////////////////////////////////
-
-func (par *parser) stateInString(c rune) (RuneType, error) {
- switch {
- case c == '\\':
- par.replaceState(par.stateInEsc, false)
- return RuneTypeStringEsc, nil
- case c == '"':
- par.popState()
- return RuneTypeStringEnd, nil
- case 0x0020 <= c && c <= 0x10FFFF:
- return RuneTypeStringChar, nil
- default:
- return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c)
- }
-}
-func (par *parser) stateInEsc(c rune) (RuneType, error) {
- switch c {
- case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
- par.replaceState(par.stateInString, false)
- return RuneTypeStringEsc1, nil
- case 'u':
- par.replaceState(par.stateInEscU, false)
- return RuneTypeStringEscU, nil
- default:
- return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
- }
-}
-func (par *parser) _stateInEscU(c rune, typ RuneType, nxt parseState) (RuneType, error) {
- switch {
- case ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'):
- par.replaceState(nxt, false)
- return typ, nil
- default:
- return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
- }
-}
-func (par *parser) stateInEscU(c rune) (RuneType, error) {
- return par._stateInEscU(c, RuneTypeStringEscUA, par.stateInEscUA)
-}
-func (par *parser) stateInEscUA(c rune) (RuneType, error) {
- return par._stateInEscU(c, RuneTypeStringEscUB, par.stateInEscUB)
-}
-func (par *parser) stateInEscUB(c rune) (RuneType, error) {
- return par._stateInEscU(c, RuneTypeStringEscUC, par.stateInEscUC)
-}
-func (par *parser) stateInEscUC(c rune) (RuneType, error) {
- return par._stateInEscU(c, RuneTypeStringEscUD, par.stateInString)
-}
-
-// state: number ///////////////////////////////////////////////////////////////////////////////////
-
-// Here's a flattened drawing of the syntax diagram from www.json.org :
-//
-// [------------ integer ----------][-- fraction ---][-------- exponent -------]
-// >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
-// โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
-// โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ
-// โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ
-// โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
-// โ”‚ โ”‚
-// โ•ฐโ”€"+"โ”€โ•ฏ
-//
-// Now here it is slightly redrawn, and with each distinct state our
-// parser can be in marked with a single-capital-letter:
-//
-// [-------------- integer ------------][--------- fraction --------][--------- exponent ---------]
-// >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
-// โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
-// โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ
-// โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ
-// โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
-// โ”‚ โ”‚
-// โ•ฐโ”€"+"โ”€โ•ฏ
-//
-// Which state we're at is the 'X' in 'stateNumberX'.
-//
-// It may be worth noting that these states, if we're going to try to
-// assign meaningful names, are perhaps best named by the type of the
-// preceding character:
-//
-// A = (nothing yet)
-// B = IntNeg
-// C = IntZero
-// D = IntDig
-// E = FracDot
-// F = FracDig
-// G = ExpE
-// H = ExpSign
-// I = ExpDig
-
-// number: integer-part ////////////////////////////////////////////////////////
-func (par *parser) stateNumberA(c rune) (RuneType, error) { // start
- switch c {
- case '-':
- par.replaceState(par.stateNumberB, true)
- return RuneTypeNumberInt, nil
- case '0':
- par.replaceState(par.stateNumberC, true)
- return RuneTypeNumberInt, nil
- case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- par.replaceState(par.stateNumberD, true)
- return RuneTypeNumberInt, nil
- default:
- return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
- }
-}
-func (par *parser) stateNumberB(c rune) (RuneType, error) { // got a leading "-"
- switch c {
- case '0':
- par.replaceState(par.stateNumberC, true)
- return RuneTypeNumberInt, nil
- case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- par.replaceState(par.stateNumberD, true)
- return RuneTypeNumberInt, nil
- default:
- return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
- }
-}
-func (par *parser) stateNumberC(c rune) (RuneType, error) { // ready for the fraction or exponent part to start
- switch c {
- case '.':
- par.replaceState(par.stateNumberE, true)
- return RuneTypeNumberFrac, nil
- case 'e', 'E':
- par.replaceState(par.stateNumberG, true)
- return RuneTypeNumberExp, nil
- default:
- par.popState()
- return par.state(c)
- }
-}
-func (par *parser) stateNumberD(c rune) (RuneType, error) { // in the integer part
- switch c {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return RuneTypeNumberInt, nil
- case '.':
- par.replaceState(par.stateNumberE, true)
- return RuneTypeNumberFrac, nil
- case 'e', 'E':
- par.replaceState(par.stateNumberG, true)
- return RuneTypeNumberExp, nil
- default:
- par.popState()
- return par.state(c)
- }
-}
-
-// number: fraction-part ///////////////////////////////////////////////////////
-func (par *parser) stateNumberE(c rune) (RuneType, error) { // got a ".", ready to read a number for the fraction part
- switch c {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- par.replaceState(par.stateNumberF, true)
- return RuneTypeNumberFrac, nil
- default:
- return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
- }
-}
-func (par *parser) stateNumberF(c rune) (RuneType, error) { // in the fraction part
- switch c {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return RuneTypeNumberFrac, nil
- case 'e', 'E':
- par.replaceState(par.stateNumberG, true)
- return RuneTypeNumberExp, nil
- default:
- par.popState()
- return par.state(c)
+func (par *Parser) stackString() string {
+ var buf strings.Builder
+ for _, s := range par.stack {
+ buf.WriteString(s.String())
}
+ return buf.String()
}
-// number: exponent-part ///////////////////////////////////////////////////////
-func (par *parser) stateNumberG(c rune) (RuneType, error) { // got a leading "e"
- switch c {
- case '-', '+':
- par.replaceState(par.stateNumberH, true)
- return RuneTypeNumberExp, nil
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- par.replaceState(par.stateNumberI, true)
- return RuneTypeNumberExp, nil
- default:
- return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c)
- }
-}
-func (par *parser) stateNumberH(c rune) (RuneType, error) { // got a + or - sign
- switch c {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- par.replaceState(par.stateNumberI, true)
- return RuneTypeNumberExp, nil
- default:
- return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c)
+func (par *Parser) handleRune(c rune) (RuneType, error) {
+ if len(par.stack) == 0 {
+ par.pushState(RuneTypeError)
}
-}
-func (par *parser) stateNumberI(c rune) (RuneType, error) { // in the exponent's number part
- switch c {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return RuneTypeNumberExp, nil
+ switch par.stack[len(par.stack)-1] {
+ // any /////////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeError:
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case '{':
+ return par.replaceState(RuneTypeObjectBeg), nil
+ case '[':
+ return par.replaceState(RuneTypeArrayBeg), nil
+ case '"':
+ return par.replaceState(RuneTypeStringBeg), nil
+ case '-':
+ return par.replaceState(RuneTypeNumberIntNeg), nil
+ case '0':
+ return par.replaceState(RuneTypeNumberIntZero), nil
+ case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberIntDig), nil
+ case 't':
+ return par.replaceState(RuneTypeTrueT), nil
+ case 'f':
+ return par.replaceState(RuneTypeFalseF), nil
+ case 'n':
+ return par.replaceState(RuneTypeNullN), nil
+ default:
+ return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c)
+ }
+ // object //////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeObjectBeg: // waiting for key to start or '}'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case '"':
+ par.replaceState(RuneTypeStringEnd)
+ return par.pushState(RuneTypeStringBeg), nil
+ case '}':
+ par.popState()
+ return RuneTypeObjectEnd, nil
+ default:
+ return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c)
+ }
+ case RuneTypeStringEnd: // waiting for ':'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ':':
+ par.replaceState(RuneTypeObjectComma)
+ par.pushState(RuneTypeError)
+ return RuneTypeObjectColon, nil
+ default:
+ return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c)
+ }
+ case RuneTypeObjectComma: // waiting for ',' or '}'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ',':
+ par.replaceState(RuneTypeObjectBeg)
+ return RuneTypeObjectComma, nil
+ case '}':
+ par.popState()
+ return RuneTypeObjectEnd, nil
+ default:
+ return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c)
+ }
+ // array ///////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeArrayBeg: // waiting for item to start or ']'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ']':
+ par.popState()
+ return RuneTypeArrayEnd, nil
+ default:
+ par.replaceState(RuneTypeArrayComma)
+ par.pushState(RuneTypeError)
+ return par.handleRune(c)
+ }
+ case RuneTypeArrayComma: // waiting for ',' or ']'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ',':
+ par.replaceState(RuneTypeArrayBeg)
+ return RuneTypeArrayComma, nil
+ case ']':
+ par.popState()
+ return RuneTypeArrayEnd, nil
+ default:
+ return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c)
+ }
+ // string //////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeStringBeg: // waiting for char or '"'
+ switch {
+ case c == '\\':
+ return par.replaceState(RuneTypeStringEsc), nil
+ case c == '"':
+ par.popState()
+ return RuneTypeStringEnd, nil
+ case 0x0020 <= c && c <= 0x10FFFF:
+ return RuneTypeStringChar, nil
+ default:
+ return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c)
+ }
+ case RuneTypeStringEsc: // waiting for escape char
+ switch c {
+ case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEsc1, nil
+ case 'u':
+ return par.replaceState(RuneTypeStringEscU), nil
+ default:
+ return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscU:
+ if _, ok := hex2int(c); ok {
+ return par.replaceState(RuneTypeStringEscUA), nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscUA:
+ if _, ok := hex2int(c); ok {
+ return par.replaceState(RuneTypeStringEscUB), nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscUB:
+ if _, ok := hex2int(c); ok {
+ return par.replaceState(RuneTypeStringEscUC), nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscUC:
+ if _, ok := hex2int(c); ok {
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEscUD, nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ // number //////////////////////////////////////////////////////////////////////////////////
+ //
+ // Here's a flattened drawing of the syntax diagram from www.json.org :
+ //
+ // [------------ integer ----------][-- fraction ---][-------- exponent -------]
+ // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
+ // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
+ // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ
+ // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ
+ // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
+ // โ”‚ โ”‚
+ // โ•ฐโ”€"+"โ”€โ•ฏ
+ //
+ // Now here it is slightly redrawn, and with each distinct state our
+ // parser can be in marked with a single-capital-letter:
+ //
+ // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------]
+ // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
+ // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
+ // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ
+ // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ
+ // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
+ // โ”‚ โ”‚
+ // โ•ฐโ”€"+"โ”€โ•ฏ
+ //
+ // You may notice that each of these states may be uniquely identified
+ // by the last-read RuneType:
+ //
+ // A = (nothing yet)
+ // B = IntNeg
+ // C = IntZero
+ // D = IntDig
+ // E = FracDot
+ // F = FracDig
+ // G = ExpE
+ // H = ExpSign
+ // I = ExpDig
+ //
+ // The 'A' state is part of the RuneTypeError "any" case
+ // above, and the remainder follow:
+ case RuneTypeNumberIntNeg: // B
+ switch c {
+ case '0':
+ return par.replaceState(RuneTypeNumberIntZero), nil
+ case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberIntDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
+ }
+ case RuneTypeNumberIntZero: // C
+ switch c {
+ case '.':
+ return par.replaceState(RuneTypeNumberFracDot), nil
+ case 'e', 'E':
+ return par.replaceState(RuneTypeNumberExpE), nil
+ default:
+ par.popState()
+ return par.handleRune(c)
+ }
+ case RuneTypeNumberIntDig: // D
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberIntDig), nil
+ case '.':
+ return par.replaceState(RuneTypeNumberFracDot), nil
+ case 'e', 'E':
+ return par.replaceState(RuneTypeNumberExpE), nil
+ default:
+ par.popState()
+ return par.handleRune(c)
+ }
+ case RuneTypeNumberFracDot: // E
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberFracDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c)
+ }
+ case RuneTypeNumberFracDig: // F
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberFracDig), nil
+ case 'e', 'E':
+ return par.replaceState(RuneTypeNumberExpE), nil
+ default:
+ par.popState()
+ return par.handleRune(c)
+ }
+ case RuneTypeNumberExpE: // G
+ switch c {
+ case '-', '+':
+ return par.replaceState(RuneTypeNumberExpSign), nil
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberExpDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c)
+ }
+ case RuneTypeNumberExpSign: // H
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberExpDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c)
+ }
+ case RuneTypeNumberExpDig: // I
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberExpDig), nil
+ default:
+ par.popState()
+ return par.handleRune(c)
+ }
+ // literals ////////////////////////////////////////////////////////////////////////////////
+ // true
+ case RuneTypeTrueT:
+ return par.expectRune(c, 'r', RuneTypeTrueR, "true", false)
+ case RuneTypeTrueR:
+ return par.expectRune(c, 'u', RuneTypeTrueU, "true", false)
+ case RuneTypeTrueU:
+ return par.expectRune(c, 'e', RuneTypeTrueE, "true", true)
+ // false
+ case RuneTypeFalseF:
+ return par.expectRune(c, 'a', RuneTypeFalseA, "false", false)
+ case RuneTypeFalseA:
+ return par.expectRune(c, 'l', RuneTypeFalseL, "false", false)
+ case RuneTypeFalseL:
+ return par.expectRune(c, 's', RuneTypeFalseS, "false", false)
+ case RuneTypeFalseS:
+ return par.expectRune(c, 'e', RuneTypeFalseE, "false", true)
+ // null
+ case RuneTypeNullN:
+ return par.expectRune(c, 'u', RuneTypeNullU, "null", false)
+ case RuneTypeNullU:
+ return par.expectRune(c, 'l', RuneTypeNullL1, "null", false)
+ case RuneTypeNullL1:
+ return par.expectRune(c, 'l', RuneTypeNullL2, "null", true)
default:
- par.popState()
- return par.state(c)
+ panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString()))
}
}
-// state: literals /////////////////////////////////////////////////////////////////////////////////
-
-func (par *parser) l(c rune, full string, exp rune, typ RuneType, nxt parseState) (RuneType, error) {
+func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) {
if c != exp {
- return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", full, c)
+ return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", context, c)
}
- if nxt == nil {
+ if pop {
par.popState()
+ return typ, nil
} else {
- par.replaceState(nxt, false)
+ return par.replaceState(typ), nil
}
- return typ, nil
-}
-
-func (par *parser) stateTrueT(c rune) (RuneType, error) {
- return par.l(c, "true", 'r', RuneTypeTrueR, par.stateTrueR)
-}
-func (par *parser) stateTrueR(c rune) (RuneType, error) {
- return par.l(c, "true", 'u', RuneTypeTrueU, par.stateTrueU)
-}
-func (par *parser) stateTrueU(c rune) (RuneType, error) {
- return par.l(c, "true", 'e', RuneTypeTrueR, nil)
-}
-
-func (par *parser) stateFalseF(c rune) (RuneType, error) {
- return par.l(c, "false", 'a', RuneTypeFalseA, par.stateFalseA)
-}
-func (par *parser) stateFalseA(c rune) (RuneType, error) {
- return par.l(c, "false", 'l', RuneTypeFalseL, par.stateFalseL)
-}
-func (par *parser) stateFalseL(c rune) (RuneType, error) {
- return par.l(c, "false", 's', RuneTypeFalseS, par.stateFalseS)
-}
-func (par *parser) stateFalseS(c rune) (RuneType, error) {
- return par.l(c, "false", 'e', RuneTypeFalseE, nil)
-}
-
-func (par *parser) stateNullN(c rune) (RuneType, error) {
- return par.l(c, "null", 'u', RuneTypeNullU, par.stateNullU)
-}
-func (par *parser) stateNullU(c rune) (RuneType, error) {
- return par.l(c, "null", 'l', RuneTypeNullL1, par.stateNullL)
-}
-func (par *parser) stateNullL(c rune) (RuneType, error) {
- return par.l(c, "null", 'l', RuneTypeNullL2, nil)
}