From 4b00a61c33d6a448c59c5509c0a408f527308c8b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 13 Aug 2022 22:05:20 -0600 Subject: parse: Rework to avoid passing around function pointers "Ignore whitespace" is probably essential for viewing this patch. --- misc.go | 13 ++ parse.go | 776 +++++++++++++++++++++++++++++++++------------------------------ 2 files changed, 416 insertions(+), 373 deletions(-) diff --git a/misc.go b/misc.go index 132b177..a567cc7 100644 --- a/misc.go +++ b/misc.go @@ -15,6 +15,19 @@ const Tab = "\t" const hex = "0123456789abcdef" +func hex2int[T interface{ byte | rune }](c T) (byte, bool) { + switch { + case '0' <= c && c <= '9': + return byte(c) - '0', true + case 'a' <= c && c <= 'f': + return byte(c) - 'a' + 10, true + case 'A' <= c && c <= 'F': + return byte(c) - 'A' + 10, true + default: + return 0, false + } +} + var ( numberType = reflect.TypeOf(json.Number("")) byteType = reflect.TypeOf(byte(0)) diff --git a/parse.go b/parse.go index a1c5472..d4b55eb 100644 --- a/parse.go +++ b/parse.go @@ -8,6 +8,7 @@ import ( "fmt" "io" iofs "io/fs" + "strings" ) type RuneType uint8 @@ -37,9 +38,14 @@ const ( RuneTypeStringEscUD // \uABCD : D RuneTypeStringEnd // closing '"' - RuneTypeNumberInt // 0|[1-9][0-9]* - RuneTypeNumberFrac // \.[0-9]* - RuneTypeNumberExp // [eE][-+]?[0-9] + RuneTypeNumberIntNeg + RuneTypeNumberIntZero + RuneTypeNumberIntDig + RuneTypeNumberFracDot + RuneTypeNumberFracDig + RuneTypeNumberExpE + RuneTypeNumberExpSign + RuneTypeNumberExpDig RuneTypeTrueT RuneTypeTrueR @@ -58,29 +64,118 @@ const ( RuneTypeNullL2 ) +func (t RuneType) String() string { + str, ok := map[RuneType]string{ + RuneTypeError: "x", + + RuneTypeSpace: " ", + + RuneTypeObjectBeg: "{", + RuneTypeObjectColon: ":", + RuneTypeObjectComma: "o", + RuneTypeObjectEnd: "}", + + RuneTypeArrayBeg: "[", + RuneTypeArrayComma: "a", + RuneTypeArrayEnd: "]", + + RuneTypeStringBeg: "โ€œ", + RuneTypeStringChar: "c", + RuneTypeStringEsc: "\\", + RuneTypeStringEsc1: "b", + RuneTypeStringEscU: "u", + RuneTypeStringEscUA: "A", + RuneTypeStringEscUB: "B", + RuneTypeStringEscUC: "C", + RuneTypeStringEscUD: "D", + RuneTypeStringEnd: "โ€", + + RuneTypeNumberIntNeg: "-", + RuneTypeNumberIntZero: "0", + RuneTypeNumberIntDig: "1", + RuneTypeNumberFracDot: ".", + RuneTypeNumberFracDig: "2", + RuneTypeNumberExpE: "e", + RuneTypeNumberExpSign: "+", + RuneTypeNumberExpDig: "3", + + RuneTypeTrueT: "๐•ฅ", // double-struck + RuneTypeTrueR: "๐•ฃ", + RuneTypeTrueU: "๐•ฆ", + RuneTypeTrueE: "๐•–", + + RuneTypeFalseF: "๐”ฃ", // fraktur + RuneTypeFalseA: "๐”ž", + RuneTypeFalseL: "๐”ฉ", + RuneTypeFalseS: "๐”ฐ", + RuneTypeFalseE: "๐”ข", + + RuneTypeNullN: "โ“", // circled + RuneTypeNullU: "โ“ค", + RuneTypeNullL1: "โ“›", + RuneTypeNullL2: "โ“", // +uppercase + }[t] + if ok { + return str + } + return fmt.Sprintf("<%d>", t) +} + +func (t RuneType) IsNumber() bool { + return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig +} + +// { waiting for key to start or '}' +// โ€ reading key / waiting for colon +// : waiting for value to start +// , reading value / waiting for ',' or '}' +// +// {"x":"y","a":"b"} +// +// { { +// โ€โ€œ {" +// โ€โ€œ {"x +// โ€ {"x" +// : {"x": +// oโ€œ {"x":" +// oโ€œ {"x":"y +// o {"x":"y" +// { {"x":"y", +// โ€โ€œ {"x":"y"," +// โ€โ€œ {"x":"y","a +// โ€ {"x":"y","a" +// : {"x":"y","a": +// oโ€œ {"x":" +// oโ€œ {"x":"y +// o {"x":"y" +// +// [ waiting for item to start or ']' +// a reading item / waiting for ',' or ']' + type parseState func(rune) (RuneType, error) -type parser struct { +type Parser struct { err error closed bool - stack []parseState - stack0IsNumber bool // whether stack[0] is a number-state; affects how EOF is handled + bailAfterCurrent bool // bad hack + + stack []RuneType } -// "public" API //////////////////////////////////////////////////////////////////////////////////// +// public API ////////////////////////////////////////////////////////////////////////////////////// -func (par *parser) HandleRune(c rune) (RuneType, error) { +func (par *Parser) HandleRune(c rune) (typ RuneType, err error) { if par.closed { return RuneTypeError, iofs.ErrClosed } if par.err != nil { return RuneTypeError, par.err } - return par.state(c) + return par.handleRune(c) } -func (par *parser) HandleEOF() error { +func (par *Parser) HandleEOF() error { if par.closed { return iofs.ErrClosed } @@ -89,8 +184,9 @@ func (par *parser) HandleEOF() error { case 0: par.err = nil case 1: - if par.stack0IsNumber { - _, par.err = par.state('\n') + if par.stack[0].IsNumber() { + _, par.err = par.handleRune('\n') + break } fallthrough default: @@ -101,386 +197,320 @@ func (par *parser) HandleEOF() error { return par.err } -// state helpers /////////////////////////////////////////////////////////////////////////////////// +// internal //////////////////////////////////////////////////////////////////////////////////////// -func (par *parser) pushState(state parseState, isNumber bool) { - if len(par.stack) == 0 { - par.stack0IsNumber = isNumber - } +func (par *Parser) pushState(state RuneType) RuneType { par.stack = append(par.stack, state) + return state } -func (par *parser) replaceState(state parseState, isNumber bool) { - if len(par.stack) == 1 { - par.stack0IsNumber = isNumber - } +func (par *Parser) replaceState(state RuneType) RuneType { par.stack[len(par.stack)-1] = state + return state } -func (par *parser) popState() { - if len(par.stack) == 1 { - par.stack0IsNumber = false - } +func (par *Parser) popState() { par.stack = par.stack[:len(par.stack)-1] } -func (par *parser) state(c rune) (RuneType, error) { - if len(par.stack) == 0 { - par.pushState(par.stateAny, false) - } - return par.stack[len(par.stack)-1](c) -} - -// state: any ////////////////////////////////////////////////////////////////////////////////////// - -func (par *parser) stateAny(c rune) (RuneType, error) { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '{': - par.replaceState(par.stateInObject, false) - return RuneTypeObjectBeg, nil - case '[': - par.replaceState(par.stateInArray, false) - return RuneTypeArrayBeg, nil - case '"': - par.replaceState(par.stateInString, false) - return RuneTypeStringBeg, nil - case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - par.replaceState(par.stateNumberA, true) - return par.state(c) - case 't': - par.replaceState(par.stateTrueT, false) - return RuneTypeTrueT, nil - case 'f': - par.replaceState(par.stateFalseF, false) - return RuneTypeFalseF, nil - case 'n': - par.replaceState(par.stateNullN, false) - return RuneTypeNullN, nil - default: - return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c) - } -} - -// state: object /////////////////////////////////////////////////////////////////////////////////// - -func (par *parser) stateInObject(c rune) (RuneType, error) { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '"': - par.replaceState(par.stateAfterK, false) - par.pushState(par.stateInString, false) - return RuneTypeStringBeg, nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) - } -} -func (par *parser) stateAfterK(c rune) (RuneType, error) { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ':': - par.replaceState(par.stateAfterV, false) - par.pushState(par.stateAny, false) - return RuneTypeObjectColon, nil - default: - return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) - } -} -func (par *parser) stateAfterV(c rune) (RuneType, error) { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.replaceState(par.stateInObject, false) - return RuneTypeObjectComma, nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) - } -} - -// state: array //////////////////////////////////////////////////////////////////////////////////// - -func (par *parser) stateInArray(c rune) (RuneType, error) { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - par.replaceState(par.stateAfterItem, false) - par.pushState(par.stateAny, false) - return par.state(c) - } -} -func (par *parser) stateAfterItem(c rune) (RuneType, error) { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.replaceState(par.stateInArray, false) - return RuneTypeArrayComma, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c) - } -} - -// state: string /////////////////////////////////////////////////////////////////////////////////// - -func (par *parser) stateInString(c rune) (RuneType, error) { - switch { - case c == '\\': - par.replaceState(par.stateInEsc, false) - return RuneTypeStringEsc, nil - case c == '"': - par.popState() - return RuneTypeStringEnd, nil - case 0x0020 <= c && c <= 0x10FFFF: - return RuneTypeStringChar, nil - default: - return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) - } -} -func (par *parser) stateInEsc(c rune) (RuneType, error) { - switch c { - case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': - par.replaceState(par.stateInString, false) - return RuneTypeStringEsc1, nil - case 'u': - par.replaceState(par.stateInEscU, false) - return RuneTypeStringEscU, nil - default: - return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) - } -} -func (par *parser) _stateInEscU(c rune, typ RuneType, nxt parseState) (RuneType, error) { - switch { - case ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'): - par.replaceState(nxt, false) - return typ, nil - default: - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } -} -func (par *parser) stateInEscU(c rune) (RuneType, error) { - return par._stateInEscU(c, RuneTypeStringEscUA, par.stateInEscUA) -} -func (par *parser) stateInEscUA(c rune) (RuneType, error) { - return par._stateInEscU(c, RuneTypeStringEscUB, par.stateInEscUB) -} -func (par *parser) stateInEscUB(c rune) (RuneType, error) { - return par._stateInEscU(c, RuneTypeStringEscUC, par.stateInEscUC) -} -func (par *parser) stateInEscUC(c rune) (RuneType, error) { - return par._stateInEscU(c, RuneTypeStringEscUD, par.stateInString) -} - -// state: number /////////////////////////////////////////////////////////////////////////////////// - -// Here's a flattened drawing of the syntax diagram from www.json.org : -// -// [------------ integer ----------][-- fraction ---][-------- exponent -------] -// >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> -// โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ -// โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ -// โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ -// โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ -// โ”‚ โ”‚ -// โ•ฐโ”€"+"โ”€โ•ฏ -// -// Now here it is slightly redrawn, and with each distinct state our -// parser can be in marked with a single-capital-letter: -// -// [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] -// >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> -// โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ -// โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ -// โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ -// โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ -// โ”‚ โ”‚ -// โ•ฐโ”€"+"โ”€โ•ฏ -// -// Which state we're at is the 'X' in 'stateNumberX'. -// -// It may be worth noting that these states, if we're going to try to -// assign meaningful names, are perhaps best named by the type of the -// preceding character: -// -// A = (nothing yet) -// B = IntNeg -// C = IntZero -// D = IntDig -// E = FracDot -// F = FracDig -// G = ExpE -// H = ExpSign -// I = ExpDig - -// number: integer-part //////////////////////////////////////////////////////// -func (par *parser) stateNumberA(c rune) (RuneType, error) { // start - switch c { - case '-': - par.replaceState(par.stateNumberB, true) - return RuneTypeNumberInt, nil - case '0': - par.replaceState(par.stateNumberC, true) - return RuneTypeNumberInt, nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - par.replaceState(par.stateNumberD, true) - return RuneTypeNumberInt, nil - default: - return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) - } -} -func (par *parser) stateNumberB(c rune) (RuneType, error) { // got a leading "-" - switch c { - case '0': - par.replaceState(par.stateNumberC, true) - return RuneTypeNumberInt, nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - par.replaceState(par.stateNumberD, true) - return RuneTypeNumberInt, nil - default: - return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) - } -} -func (par *parser) stateNumberC(c rune) (RuneType, error) { // ready for the fraction or exponent part to start - switch c { - case '.': - par.replaceState(par.stateNumberE, true) - return RuneTypeNumberFrac, nil - case 'e', 'E': - par.replaceState(par.stateNumberG, true) - return RuneTypeNumberExp, nil - default: - par.popState() - return par.state(c) - } -} -func (par *parser) stateNumberD(c rune) (RuneType, error) { // in the integer part - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return RuneTypeNumberInt, nil - case '.': - par.replaceState(par.stateNumberE, true) - return RuneTypeNumberFrac, nil - case 'e', 'E': - par.replaceState(par.stateNumberG, true) - return RuneTypeNumberExp, nil - default: - par.popState() - return par.state(c) - } -} - -// number: fraction-part /////////////////////////////////////////////////////// -func (par *parser) stateNumberE(c rune) (RuneType, error) { // got a ".", ready to read a number for the fraction part - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - par.replaceState(par.stateNumberF, true) - return RuneTypeNumberFrac, nil - default: - return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) - } -} -func (par *parser) stateNumberF(c rune) (RuneType, error) { // in the fraction part - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return RuneTypeNumberFrac, nil - case 'e', 'E': - par.replaceState(par.stateNumberG, true) - return RuneTypeNumberExp, nil - default: - par.popState() - return par.state(c) +func (par *Parser) stackString() string { + var buf strings.Builder + for _, s := range par.stack { + buf.WriteString(s.String()) } + return buf.String() } -// number: exponent-part /////////////////////////////////////////////////////// -func (par *parser) stateNumberG(c rune) (RuneType, error) { // got a leading "e" - switch c { - case '-', '+': - par.replaceState(par.stateNumberH, true) - return RuneTypeNumberExp, nil - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - par.replaceState(par.stateNumberI, true) - return RuneTypeNumberExp, nil - default: - return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c) - } -} -func (par *parser) stateNumberH(c rune) (RuneType, error) { // got a + or - sign - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - par.replaceState(par.stateNumberI, true) - return RuneTypeNumberExp, nil - default: - return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c) +func (par *Parser) handleRune(c rune) (RuneType, error) { + if len(par.stack) == 0 { + par.pushState(RuneTypeError) } -} -func (par *parser) stateNumberI(c rune) (RuneType, error) { // in the exponent's number part - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return RuneTypeNumberExp, nil + switch par.stack[len(par.stack)-1] { + // any ///////////////////////////////////////////////////////////////////////////////////// + case RuneTypeError: + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case '{': + return par.replaceState(RuneTypeObjectBeg), nil + case '[': + return par.replaceState(RuneTypeArrayBeg), nil + case '"': + return par.replaceState(RuneTypeStringBeg), nil + case '-': + return par.replaceState(RuneTypeNumberIntNeg), nil + case '0': + return par.replaceState(RuneTypeNumberIntZero), nil + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + case 't': + return par.replaceState(RuneTypeTrueT), nil + case 'f': + return par.replaceState(RuneTypeFalseF), nil + case 'n': + return par.replaceState(RuneTypeNullN), nil + default: + return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c) + } + // object ////////////////////////////////////////////////////////////////////////////////// + case RuneTypeObjectBeg: // waiting for key to start or '}' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case '"': + par.replaceState(RuneTypeStringEnd) + return par.pushState(RuneTypeStringBeg), nil + case '}': + par.popState() + return RuneTypeObjectEnd, nil + default: + return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) + } + case RuneTypeStringEnd: // waiting for ':' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ':': + par.replaceState(RuneTypeObjectComma) + par.pushState(RuneTypeError) + return RuneTypeObjectColon, nil + default: + return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) + } + case RuneTypeObjectComma: // waiting for ',' or '}' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ',': + par.replaceState(RuneTypeObjectBeg) + return RuneTypeObjectComma, nil + case '}': + par.popState() + return RuneTypeObjectEnd, nil + default: + return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) + } + // array /////////////////////////////////////////////////////////////////////////////////// + case RuneTypeArrayBeg: // waiting for item to start or ']' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ']': + par.popState() + return RuneTypeArrayEnd, nil + default: + par.replaceState(RuneTypeArrayComma) + par.pushState(RuneTypeError) + return par.handleRune(c) + } + case RuneTypeArrayComma: // waiting for ',' or ']' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ',': + par.replaceState(RuneTypeArrayBeg) + return RuneTypeArrayComma, nil + case ']': + par.popState() + return RuneTypeArrayEnd, nil + default: + return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c) + } + // string ////////////////////////////////////////////////////////////////////////////////// + case RuneTypeStringBeg: // waiting for char or '"' + switch { + case c == '\\': + return par.replaceState(RuneTypeStringEsc), nil + case c == '"': + par.popState() + return RuneTypeStringEnd, nil + case 0x0020 <= c && c <= 0x10FFFF: + return RuneTypeStringChar, nil + default: + return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) + } + case RuneTypeStringEsc: // waiting for escape char + switch c { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEsc1, nil + case 'u': + return par.replaceState(RuneTypeStringEscU), nil + default: + return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) + } + case RuneTypeStringEscU: + if _, ok := hex2int(c); ok { + return par.replaceState(RuneTypeStringEscUA), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUA: + if _, ok := hex2int(c); ok { + return par.replaceState(RuneTypeStringEscUB), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUB: + if _, ok := hex2int(c); ok { + return par.replaceState(RuneTypeStringEscUC), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUC: + if _, ok := hex2int(c); ok { + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + // number ////////////////////////////////////////////////////////////////////////////////// + // + // Here's a flattened drawing of the syntax diagram from www.json.org : + // + // [------------ integer ----------][-- fraction ---][-------- exponent -------] + // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> + // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ + // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ + // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ + // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ + // โ”‚ โ”‚ + // โ•ฐโ”€"+"โ”€โ•ฏ + // + // Now here it is slightly redrawn, and with each distinct state our + // parser can be in marked with a single-capital-letter: + // + // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] + // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> + // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ + // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ + // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ + // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ + // โ”‚ โ”‚ + // โ•ฐโ”€"+"โ”€โ•ฏ + // + // You may notice that each of these states may be uniquely identified + // by the last-read RuneType: + // + // A = (nothing yet) + // B = IntNeg + // C = IntZero + // D = IntDig + // E = FracDot + // F = FracDig + // G = ExpE + // H = ExpSign + // I = ExpDig + // + // The 'A' state is part of the RuneTypeError "any" case + // above, and the remainder follow: + case RuneTypeNumberIntNeg: // B + switch c { + case '0': + return par.replaceState(RuneTypeNumberIntZero), nil + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + default: + return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) + } + case RuneTypeNumberIntZero: // C + switch c { + case '.': + return par.replaceState(RuneTypeNumberFracDot), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.handleRune(c) + } + case RuneTypeNumberIntDig: // D + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + case '.': + return par.replaceState(RuneTypeNumberFracDot), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.handleRune(c) + } + case RuneTypeNumberFracDot: // E + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberFracDig), nil + default: + return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) + } + case RuneTypeNumberFracDig: // F + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberFracDig), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.handleRune(c) + } + case RuneTypeNumberExpE: // G + switch c { + case '-', '+': + return par.replaceState(RuneTypeNumberExpSign), nil + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c) + } + case RuneTypeNumberExpSign: // H + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c) + } + case RuneTypeNumberExpDig: // I + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + par.popState() + return par.handleRune(c) + } + // literals //////////////////////////////////////////////////////////////////////////////// + // true + case RuneTypeTrueT: + return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) + case RuneTypeTrueR: + return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) + case RuneTypeTrueU: + return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) + // false + case RuneTypeFalseF: + return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) + case RuneTypeFalseA: + return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) + case RuneTypeFalseL: + return par.expectRune(c, 's', RuneTypeFalseS, "false", false) + case RuneTypeFalseS: + return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) + // null + case RuneTypeNullN: + return par.expectRune(c, 'u', RuneTypeNullU, "null", false) + case RuneTypeNullU: + return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) + case RuneTypeNullL1: + return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) default: - par.popState() - return par.state(c) + panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString())) } } -// state: literals ///////////////////////////////////////////////////////////////////////////////// - -func (par *parser) l(c rune, full string, exp rune, typ RuneType, nxt parseState) (RuneType, error) { +func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { if c != exp { - return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", full, c) + return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", context, c) } - if nxt == nil { + if pop { par.popState() + return typ, nil } else { - par.replaceState(nxt, false) + return par.replaceState(typ), nil } - return typ, nil -} - -func (par *parser) stateTrueT(c rune) (RuneType, error) { - return par.l(c, "true", 'r', RuneTypeTrueR, par.stateTrueR) -} -func (par *parser) stateTrueR(c rune) (RuneType, error) { - return par.l(c, "true", 'u', RuneTypeTrueU, par.stateTrueU) -} -func (par *parser) stateTrueU(c rune) (RuneType, error) { - return par.l(c, "true", 'e', RuneTypeTrueR, nil) -} - -func (par *parser) stateFalseF(c rune) (RuneType, error) { - return par.l(c, "false", 'a', RuneTypeFalseA, par.stateFalseA) -} -func (par *parser) stateFalseA(c rune) (RuneType, error) { - return par.l(c, "false", 'l', RuneTypeFalseL, par.stateFalseL) -} -func (par *parser) stateFalseL(c rune) (RuneType, error) { - return par.l(c, "false", 's', RuneTypeFalseS, par.stateFalseS) -} -func (par *parser) stateFalseS(c rune) (RuneType, error) { - return par.l(c, "false", 'e', RuneTypeFalseE, nil) -} - -func (par *parser) stateNullN(c rune) (RuneType, error) { - return par.l(c, "null", 'u', RuneTypeNullU, par.stateNullU) -} -func (par *parser) stateNullU(c rune) (RuneType, error) { - return par.l(c, "null", 'l', RuneTypeNullL1, par.stateNullL) -} -func (par *parser) stateNullL(c rune) (RuneType, error) { - return par.l(c, "null", 'l', RuneTypeNullL2, nil) } -- cgit v1.1-4-g5e80