summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-01-26 00:07:39 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-01-26 00:08:56 -0700
commit4148776399cb7ea5e10c74dc465e4e1e682cb399 (patch)
treedaf8a3aa6521d3d56aa986f10c3d23ea1b016f4b /internal
parente483afa206686ce748ad270140f99fad9d713aad (diff)
Move the Parser to the internal package
Diffstat (limited to 'internal')
-rw-r--r--internal/export_tags.go (renamed from internal/export.go)0
-rw-r--r--internal/hex.go20
-rw-r--r--internal/parse.go690
3 files changed, 710 insertions, 0 deletions
diff --git a/internal/export.go b/internal/export_tags.go
index d8cf622..d8cf622 100644
--- a/internal/export.go
+++ b/internal/export_tags.go
diff --git a/internal/hex.go b/internal/hex.go
new file mode 100644
index 0000000..9ef78eb
--- /dev/null
+++ b/internal/hex.go
@@ -0,0 +1,20 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package internal
+
+const Hex = "0123456789abcdef"
+
+func HexToInt[T interface{ byte | rune }](c T) (byte, bool) {
+ switch {
+ case '0' <= c && c <= '9':
+ return byte(c) - '0', true
+ case 'a' <= c && c <= 'f':
+ return byte(c) - 'a' + 10, true
+ case 'A' <= c && c <= 'F':
+ return byte(c) - 'A' + 10, true
+ default:
+ return 0, false
+ }
+}
diff --git a/internal/parse.go b/internal/parse.go
new file mode 100644
index 0000000..12d7600
--- /dev/null
+++ b/internal/parse.go
@@ -0,0 +1,690 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package internal
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ iofs "io/fs"
+ "strings"
+)
+
+var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
+
+type RuneType uint8
+
+const (
+ RuneTypeError = RuneType(iota)
+
+ RuneTypeSpace // whitespace
+
+ RuneTypeObjectBeg // '{'
+ RuneTypeObjectColon // ':'
+ RuneTypeObjectComma // ','
+ RuneTypeObjectEnd // '}'
+
+ RuneTypeArrayBeg // '['
+ RuneTypeArrayComma // ','
+ RuneTypeArrayEnd // ']'
+
+ RuneTypeStringBeg // opening '"'
+ RuneTypeStringChar // normal character
+ RuneTypeStringEsc // backslash
+ RuneTypeStringEsc1 // single-char after a backslash
+ RuneTypeStringEscU // \uABCD : u
+ RuneTypeStringEscUA // \uABCD : A
+ RuneTypeStringEscUB // \uABCD : B
+ RuneTypeStringEscUC // \uABCD : C
+ RuneTypeStringEscUD // \uABCD : D
+ RuneTypeStringEnd // closing '"'
+
+ RuneTypeNumberIntNeg
+ RuneTypeNumberIntZero
+ RuneTypeNumberIntDig
+ RuneTypeNumberFracDot
+ RuneTypeNumberFracDig
+ RuneTypeNumberExpE
+ RuneTypeNumberExpSign
+ RuneTypeNumberExpDig
+
+ RuneTypeTrueT
+ RuneTypeTrueR
+ RuneTypeTrueU
+ RuneTypeTrueE
+
+ RuneTypeFalseF
+ RuneTypeFalseA
+ RuneTypeFalseL
+ RuneTypeFalseS
+ RuneTypeFalseE
+
+ RuneTypeNullN
+ RuneTypeNullU
+ RuneTypeNullL1
+ RuneTypeNullL2
+
+ RuneTypeEOF
+)
+
+func (t RuneType) GoString() string {
+ str, ok := map[RuneType]string{
+ RuneTypeError: "RuneTypeError",
+
+ RuneTypeSpace: "RuneTypeSpace",
+
+ RuneTypeObjectBeg: "RuneTypeObjectBeg",
+ RuneTypeObjectColon: "RuneTypeObjectColon",
+ RuneTypeObjectComma: "RuneTypeObjectComma",
+ RuneTypeObjectEnd: "RuneTypeObjectEnd",
+
+ RuneTypeArrayBeg: "RuneTypeArrayBeg",
+ RuneTypeArrayComma: "RuneTypeArrayComma",
+ RuneTypeArrayEnd: "RuneTypeArrayEnd",
+
+ RuneTypeStringBeg: "RuneTypeStringBeg",
+ RuneTypeStringChar: "RuneTypeStringChar",
+ RuneTypeStringEsc: "RuneTypeStringEsc",
+ RuneTypeStringEsc1: "RuneTypeStringEsc1",
+ RuneTypeStringEscU: "RuneTypeStringEscU",
+ RuneTypeStringEscUA: "RuneTypeStringEscUA",
+ RuneTypeStringEscUB: "RuneTypeStringEscUB",
+ RuneTypeStringEscUC: "RuneTypeStringEscUC",
+ RuneTypeStringEscUD: "RuneTypeStringEscUD",
+ RuneTypeStringEnd: "RuneTypeStringEnd",
+
+ RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg",
+ RuneTypeNumberIntZero: "RuneTypeNumberIntZero",
+ RuneTypeNumberIntDig: "RuneTypeNumberIntDig",
+ RuneTypeNumberFracDot: "RuneTypeNumberFracDot",
+ RuneTypeNumberFracDig: "RuneTypeNumberFracDig",
+ RuneTypeNumberExpE: "RuneTypeNumberExpE",
+ RuneTypeNumberExpSign: "RuneTypeNumberExpSign",
+ RuneTypeNumberExpDig: "RuneTypeNumberExpDig",
+
+ RuneTypeTrueT: "RuneTypeTrueT",
+ RuneTypeTrueR: "RuneTypeTrueR",
+ RuneTypeTrueU: "RuneTypeTrueU",
+ RuneTypeTrueE: "RuneTypeTrueE",
+
+ RuneTypeFalseF: "RuneTypeFalseF",
+ RuneTypeFalseA: "RuneTypeFalseA",
+ RuneTypeFalseL: "RuneTypeFalseL",
+ RuneTypeFalseS: "RuneTypeFalseS",
+ RuneTypeFalseE: "RuneTypeFalseE",
+
+ RuneTypeNullN: "RuneTypeNullN",
+ RuneTypeNullU: "RuneTypeNullU",
+ RuneTypeNullL1: "RuneTypeNullL1",
+ RuneTypeNullL2: "RuneTypeNullL2",
+
+ RuneTypeEOF: "RuneTypeEOF",
+ }[t]
+ if ok {
+ return str
+ }
+ return fmt.Sprintf("RuneType(%d)", t)
+}
+
+func (t RuneType) String() string {
+ str, ok := map[RuneType]string{
+ RuneTypeError: "x",
+
+ RuneTypeSpace: " ",
+
+ RuneTypeObjectBeg: "{",
+ RuneTypeObjectColon: ":",
+ RuneTypeObjectComma: "o",
+ RuneTypeObjectEnd: "}",
+
+ RuneTypeArrayBeg: "[",
+ RuneTypeArrayComma: "a",
+ RuneTypeArrayEnd: "]",
+
+ RuneTypeStringBeg: "โ€œ",
+ RuneTypeStringChar: "c",
+ RuneTypeStringEsc: "\\",
+ RuneTypeStringEsc1: "b",
+ RuneTypeStringEscU: "u",
+ RuneTypeStringEscUA: "A",
+ RuneTypeStringEscUB: "B",
+ RuneTypeStringEscUC: "C",
+ RuneTypeStringEscUD: "D",
+ RuneTypeStringEnd: "โ€",
+
+ RuneTypeNumberIntNeg: "-",
+ RuneTypeNumberIntZero: "0",
+ RuneTypeNumberIntDig: "1",
+ RuneTypeNumberFracDot: ".",
+ RuneTypeNumberFracDig: "2",
+ RuneTypeNumberExpE: "e",
+ RuneTypeNumberExpSign: "+",
+ RuneTypeNumberExpDig: "3",
+
+ RuneTypeTrueT: "๐•ฅ", // double-struck
+ RuneTypeTrueR: "๐•ฃ",
+ RuneTypeTrueU: "๐•ฆ",
+ RuneTypeTrueE: "๐•–",
+
+ RuneTypeFalseF: "๐”ฃ", // fraktur
+ RuneTypeFalseA: "๐”ž",
+ RuneTypeFalseL: "๐”ฉ",
+ RuneTypeFalseS: "๐”ฐ",
+ RuneTypeFalseE: "๐”ข",
+
+ RuneTypeNullN: "โ“", // circled
+ RuneTypeNullU: "โ“ค",
+ RuneTypeNullL1: "โ“›",
+ RuneTypeNullL2: "โ“", // +uppercase
+
+ RuneTypeEOF: "$",
+ }[t]
+ if ok {
+ return str
+ }
+ return fmt.Sprintf("<%d>", t)
+}
+
+func (t RuneType) JSONType() string {
+ return map[RuneType]string{
+ RuneTypeObjectBeg: "object",
+ RuneTypeArrayBeg: "array",
+ RuneTypeStringBeg: "string",
+ RuneTypeNumberIntNeg: "number",
+ RuneTypeNumberIntZero: "number",
+ RuneTypeNumberIntDig: "number",
+ RuneTypeTrueT: "true",
+ RuneTypeFalseF: "false",
+ RuneTypeNullN: "null",
+ RuneTypeEOF: "eof",
+ }[t]
+}
+
+func (t RuneType) IsNumber() bool {
+ return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig
+}
+
+type Parser struct {
+ // Setting MaxError to a value greater than 0 causes
+ // HandleRune to return ErrParserExceededMaxDepth if
+ // objects/arrays become nested more deeply than this.
+ MaxDepth int
+
+ initialized bool
+
+ err error
+ closed bool
+
+ // We reuse RuneTypes to store the stack. The base idea is
+ // that, stack items are "the most recently read
+ // stack-relevant RuneType".
+ //
+ // We treat RuneTypeError as a wildcard.
+ //
+ // The "normal"stack-relevant RuneTypes are:
+ //
+ // โ€œ\uABC for strings
+ // -01.2e+3 for numbers
+ // ๐•ฅ๐•ฃ๐•ฆ for "true"
+ // ๐”ฃ๐”ž๐”ฉ๐”ฐ for "false"
+ // โ“โ“คโ“› for "null"
+ //
+ // Objects and arrays break the "most recently read RuneType"
+ // rule; they need some special assignments:
+ //
+ // { object: waiting for key to start or '}'
+ // โ€ object: reading key / waiting for colon
+ // : object: waiting for value to start
+ // o object: reading value / waiting for ',' or '}'
+ //
+ // [ array: waiting for item to start or ']'
+ // a array: reading item / waiting for ',' or ']'
+ // ] array: waiting for item to start
+ //
+ // Within each element type, the stack item is replaced, not pushed.
+ //
+ // For example, given the input string
+ //
+ // {"x":"y","a":"b"}
+ //
+ // The stack would be
+ //
+ // stack processed
+ // x
+ // { {
+ // โ€โ€œ {"
+ // โ€โ€œ {"x
+ // โ€ {"x"
+ // : {"x":
+ // oโ€œ {"x":"
+ // oโ€œ {"x":"y
+ // o {"x":"y"
+ // { {"x":"y",
+ // โ€โ€œ {"x":"y","
+ // โ€โ€œ {"x":"y","a
+ // โ€ {"x":"y","a"
+ // : {"x":"y","a":
+ // oโ€œ {"x":"y","a":"
+ // oโ€œ {"x":"y","a":"b
+ // o {"x":"y","a":"b"
+ // {"x":"y","a":"b"}
+ stack []RuneType
+}
+
+func (par *Parser) pushState(state RuneType) RuneType {
+ par.stack = append(par.stack, state)
+ return state
+}
+func (par *Parser) replaceState(state RuneType) RuneType {
+ par.stack[len(par.stack)-1] = state
+ return state
+}
+func (par *Parser) popState() {
+ par.stack = par.stack[:len(par.stack)-1]
+}
+
+func (par *Parser) stackString() string {
+ var buf strings.Builder
+ for _, s := range par.stack {
+ buf.WriteString(s.String())
+ }
+ return buf.String()
+}
+
+func (par *Parser) StackIsEmpty() bool {
+ return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == RuneTypeError)
+}
+
+// Reset all Parser state.
+func (par *Parser) Reset() {
+ *par = Parser{
+ MaxDepth: par.MaxDepth,
+ }
+}
+
+// HandleEOF feeds EOF to the Parser. The returned RuneType is either
+// RuneTypeEOF or RuneTypeError.
+//
+// An error is returned if and only if the RuneType is RuneTypeError.
+// Returns io/fs.ErrClosed if .HandleEOF() has previously been called
+// (and .Reset() has not been called since).
+//
+// Once RuneTypeError or RuneTypeEOF has been returned, it will keep
+// being returned from both .HandleRune(c) and .HandleEOF() until
+// .Reset() is called.
+//
+// RuneTypeEOF indicates that a complete JSON document has been read.
+func (par *Parser) HandleEOF() (RuneType, error) {
+ if par.closed {
+ return RuneTypeError, iofs.ErrClosed
+ }
+ defer func() {
+ par.closed = true
+ }()
+ if par.err != nil {
+ return RuneTypeError, par.err
+ }
+ if !par.initialized {
+ par.initialized = true
+ par.pushState(RuneTypeError)
+ }
+ switch len(par.stack) {
+ case 0:
+ return RuneTypeEOF, nil
+ case 1:
+ switch {
+ case par.stack[0].IsNumber():
+ if _, err := par.HandleRune('\n'); err == nil {
+ return RuneTypeEOF, nil
+ }
+ case par.stack[0] == RuneTypeError:
+ par.err = io.EOF
+ return RuneTypeError, par.err
+ }
+ fallthrough
+ default:
+ par.err = io.ErrUnexpectedEOF
+ return RuneTypeError, par.err
+ }
+}
+
+// HandleRune feeds a Unicode rune to the Parser.
+//
+// An error is returned if and only if the RuneType is RuneTypeError.
+// Returns io/fs.ErrClosed if .HandleEOF() has previously been called
+// (and .Reset() has not been called since).
+//
+// Once RuneTypeError or RuneTypeEOF has been returned, it will keep
+// being returned from both .HandleRune(c) and .HandleEOF() until
+// .Reset() is called.
+//
+// RuneTypeEOF indicates that the rune cannot be appended to the JSON
+// document; a new JSON document must be started in order to process
+// that rune.
+func (par *Parser) HandleRune(c rune) (RuneType, error) {
+ if par.closed {
+ return RuneTypeError, iofs.ErrClosed
+ }
+ if par.err != nil {
+ return RuneTypeError, par.err
+ }
+ if !par.initialized {
+ par.initialized = true
+ par.pushState(RuneTypeError)
+ }
+ if len(par.stack) == 0 {
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ default:
+ return RuneTypeEOF, nil
+ }
+ }
+ switch par.stack[len(par.stack)-1] {
+ // any /////////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeError:
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case '{':
+ if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth {
+ return RuneTypeError, ErrParserExceededMaxDepth
+ }
+ return par.replaceState(RuneTypeObjectBeg), nil
+ case '[':
+ if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth {
+ return RuneTypeError, ErrParserExceededMaxDepth
+ }
+ return par.replaceState(RuneTypeArrayBeg), nil
+ case '"':
+ return par.replaceState(RuneTypeStringBeg), nil
+ case '-':
+ return par.replaceState(RuneTypeNumberIntNeg), nil
+ case '0':
+ return par.replaceState(RuneTypeNumberIntZero), nil
+ case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberIntDig), nil
+ case 't':
+ return par.replaceState(RuneTypeTrueT), nil
+ case 'f':
+ return par.replaceState(RuneTypeFalseF), nil
+ case 'n':
+ return par.replaceState(RuneTypeNullN), nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c)
+ }
+ // object //////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeObjectBeg: // waiting for key to start or '}'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case '"':
+ par.replaceState(RuneTypeStringEnd)
+ return par.pushState(RuneTypeStringBeg), nil
+ case '}':
+ par.popState()
+ return RuneTypeObjectEnd, nil
+ default:
+ return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c)
+ }
+ case RuneTypeStringEnd: // waiting for ':'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ':':
+ par.replaceState(RuneTypeObjectComma)
+ par.pushState(RuneTypeError)
+ return RuneTypeObjectColon, nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q after object key", c)
+ }
+ case RuneTypeObjectComma: // waiting for ',' or '}'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ',':
+ par.replaceState(RuneTypeObjectBeg)
+ return RuneTypeObjectComma, nil
+ case '}':
+ par.popState()
+ return RuneTypeObjectEnd, nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c)
+ }
+ // array ///////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeArrayBeg: // waiting for item to start or ']'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ']':
+ par.popState()
+ return RuneTypeArrayEnd, nil
+ default:
+ par.replaceState(RuneTypeArrayComma)
+ par.pushState(RuneTypeError)
+ return par.HandleRune(c)
+ }
+ case RuneTypeArrayEnd: // waiting for item
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ default:
+ par.replaceState(RuneTypeArrayComma)
+ par.pushState(RuneTypeError)
+ return par.HandleRune(c)
+ }
+ case RuneTypeArrayComma: // waiting for ',' or ']'
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ case ',':
+ par.replaceState(RuneTypeArrayEnd)
+ return RuneTypeArrayComma, nil
+ case ']':
+ par.popState()
+ return RuneTypeArrayEnd, nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q after array element", c)
+ }
+ // string //////////////////////////////////////////////////////////////////////////////////
+ case RuneTypeStringBeg: // waiting for char or '"'
+ switch {
+ case c == '\\':
+ return par.replaceState(RuneTypeStringEsc), nil
+ case c == '"':
+ par.popState()
+ return RuneTypeStringEnd, nil
+ case 0x0020 <= c && c <= 0x10FFFF:
+ return RuneTypeStringChar, nil
+ default:
+ return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c)
+ }
+ case RuneTypeStringEsc: // waiting for escape char
+ switch c {
+ case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEsc1, nil
+ case 'u':
+ return par.replaceState(RuneTypeStringEscU), nil
+ default:
+ return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscU:
+ if _, ok := HexToInt(c); ok {
+ return par.replaceState(RuneTypeStringEscUA), nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscUA:
+ if _, ok := HexToInt(c); ok {
+ return par.replaceState(RuneTypeStringEscUB), nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscUB:
+ if _, ok := HexToInt(c); ok {
+ return par.replaceState(RuneTypeStringEscUC), nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ case RuneTypeStringEscUC:
+ if _, ok := HexToInt(c); ok {
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEscUD, nil
+ } else {
+ return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+ }
+ // number //////////////////////////////////////////////////////////////////////////////////
+ //
+ // Here's a flattened drawing of the syntax diagram from www.json.org :
+ //
+ // [------------ integer ----------][-- fraction ---][-------- exponent -------]
+ // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
+ // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
+ // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ
+ // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ
+ // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
+ // โ”‚ โ”‚
+ // โ•ฐโ”€"+"โ”€โ•ฏ
+ //
+ // Now here it is slightly redrawn, and with each distinct state our
+ // parser can be in marked with a single-capital-letter:
+ //
+ // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------]
+ // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
+ // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
+ // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ
+ // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ
+ // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
+ // โ”‚ โ”‚
+ // โ•ฐโ”€"+"โ”€โ•ฏ
+ //
+ // You may notice that each of these states may be uniquely identified
+ // by the last-read RuneType:
+ //
+ // A = (nothing yet)
+ // B = IntNeg
+ // C = IntZero
+ // D = IntDig
+ // E = FracDot
+ // F = FracDig
+ // G = ExpE
+ // H = ExpSign
+ // I = ExpDig
+ //
+ // The 'A' state is part of the RuneTypeError "any" case
+ // above, and the remainder follow:
+ case RuneTypeNumberIntNeg: // B
+ switch c {
+ case '0':
+ return par.replaceState(RuneTypeNumberIntZero), nil
+ case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberIntDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+ }
+ case RuneTypeNumberIntZero: // C
+ switch c {
+ case '.':
+ return par.replaceState(RuneTypeNumberFracDot), nil
+ case 'e', 'E':
+ return par.replaceState(RuneTypeNumberExpE), nil
+ default:
+ par.popState()
+ return par.HandleRune(c)
+ }
+ case RuneTypeNumberIntDig: // D
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberIntDig), nil
+ case '.':
+ return par.replaceState(RuneTypeNumberFracDot), nil
+ case 'e', 'E':
+ return par.replaceState(RuneTypeNumberExpE), nil
+ default:
+ par.popState()
+ return par.HandleRune(c)
+ }
+ case RuneTypeNumberFracDot: // E
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberFracDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+ }
+ case RuneTypeNumberFracDig: // F
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberFracDig), nil
+ case 'e', 'E':
+ return par.replaceState(RuneTypeNumberExpE), nil
+ default:
+ par.popState()
+ return par.HandleRune(c)
+ }
+ case RuneTypeNumberExpE: // G
+ switch c {
+ case '-', '+':
+ return par.replaceState(RuneTypeNumberExpSign), nil
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberExpDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+ }
+ case RuneTypeNumberExpSign: // H
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberExpDig), nil
+ default:
+ return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+ }
+ case RuneTypeNumberExpDig: // I
+ switch c {
+ case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return par.replaceState(RuneTypeNumberExpDig), nil
+ default:
+ par.popState()
+ return par.HandleRune(c)
+ }
+ // literals ////////////////////////////////////////////////////////////////////////////////
+ // true
+ case RuneTypeTrueT:
+ return par.expectRune(c, 'r', RuneTypeTrueR, "true", false)
+ case RuneTypeTrueR:
+ return par.expectRune(c, 'u', RuneTypeTrueU, "true", false)
+ case RuneTypeTrueU:
+ return par.expectRune(c, 'e', RuneTypeTrueE, "true", true)
+ // false
+ case RuneTypeFalseF:
+ return par.expectRune(c, 'a', RuneTypeFalseA, "false", false)
+ case RuneTypeFalseA:
+ return par.expectRune(c, 'l', RuneTypeFalseL, "false", false)
+ case RuneTypeFalseL:
+ return par.expectRune(c, 's', RuneTypeFalseS, "false", false)
+ case RuneTypeFalseS:
+ return par.expectRune(c, 'e', RuneTypeFalseE, "false", true)
+ // null
+ case RuneTypeNullN:
+ return par.expectRune(c, 'u', RuneTypeNullU, "null", false)
+ case RuneTypeNullU:
+ return par.expectRune(c, 'l', RuneTypeNullL1, "null", false)
+ case RuneTypeNullL1:
+ return par.expectRune(c, 'l', RuneTypeNullL2, "null", true)
+ default:
+ panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString()))
+ }
+}
+
+func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) {
+ if c != exp {
+ return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp)
+ }
+ if pop {
+ par.popState()
+ return typ, nil
+ } else {
+ return par.replaceState(typ), nil
+ }
+}