From 2b9473f5e8816eeea76b2fdada184532be00d3a2 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 7 Feb 2023 12:18:29 -0700 Subject: internal: Split in to sub-packages --- internal/parse.go | 845 ------------------------------------------------------ 1 file changed, 845 deletions(-) delete mode 100644 internal/parse.go (limited to 'internal/parse.go') diff --git a/internal/parse.go b/internal/parse.go deleted file mode 100644 index 36db4a9..0000000 --- a/internal/parse.go +++ /dev/null @@ -1,845 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "errors" - "fmt" - "io" - iofs "io/fs" - "strings" -) - -var ErrParserExceededMaxDepth = errors.New("exceeded max depth") - -// RuneType is the classification of a rune when parsing JSON input. -// A Parser, rather than grouping runes into tokens and classifying -// tokens, classifies runes directly. -type RuneType uint8 - -const ( - RuneTypeError RuneType = iota - - RuneTypeSpace // whitespace - - RuneTypeObjectBeg // '{' - RuneTypeObjectColon // ':' - RuneTypeObjectComma // ',' - RuneTypeObjectEnd // '}' - - RuneTypeArrayBeg // '[' - RuneTypeArrayComma // ',' - RuneTypeArrayEnd // ']' - - RuneTypeStringBeg // opening '"' - RuneTypeStringChar // normal character - RuneTypeStringEsc // backslash - RuneTypeStringEsc1 // single-char after a backslash - RuneTypeStringEscU // \uABCD : u - RuneTypeStringEscUA // \uABCD : A - RuneTypeStringEscUB // \uABCD : B - RuneTypeStringEscUC // \uABCD : C - RuneTypeStringEscUD // \uABCD : D - RuneTypeStringEnd // closing '"' - - RuneTypeNumberIntNeg - RuneTypeNumberIntZero // leading zero only; non-leading zeros are IntDig, not IntZero - RuneTypeNumberIntDig - RuneTypeNumberFracDot - RuneTypeNumberFracDig - RuneTypeNumberExpE - RuneTypeNumberExpSign - RuneTypeNumberExpDig - - RuneTypeTrueT - RuneTypeTrueR - RuneTypeTrueU - RuneTypeTrueE - - RuneTypeFalseF - RuneTypeFalseA - RuneTypeFalseL - RuneTypeFalseS - RuneTypeFalseE - - RuneTypeNullN - RuneTypeNullU - RuneTypeNullL1 - RuneTypeNullL2 - - RuneTypeEOF - - // Not a real rune type, but used as a stack state. - runeTypeAny -) - -// GoString implements fmt.GoStringer. -// -//nolint:dupl // False positive due to similarly shaped AST. -func (t RuneType) GoString() string { - str, ok := map[RuneType]string{ - RuneTypeError: "RuneTypeError", - - RuneTypeSpace: "RuneTypeSpace", - - RuneTypeObjectBeg: "RuneTypeObjectBeg", - RuneTypeObjectColon: "RuneTypeObjectColon", - RuneTypeObjectComma: "RuneTypeObjectComma", - RuneTypeObjectEnd: "RuneTypeObjectEnd", - - RuneTypeArrayBeg: "RuneTypeArrayBeg", - RuneTypeArrayComma: "RuneTypeArrayComma", - RuneTypeArrayEnd: "RuneTypeArrayEnd", - - RuneTypeStringBeg: "RuneTypeStringBeg", - RuneTypeStringChar: "RuneTypeStringChar", - RuneTypeStringEsc: "RuneTypeStringEsc", - RuneTypeStringEsc1: "RuneTypeStringEsc1", - RuneTypeStringEscU: "RuneTypeStringEscU", - RuneTypeStringEscUA: "RuneTypeStringEscUA", - RuneTypeStringEscUB: "RuneTypeStringEscUB", - RuneTypeStringEscUC: "RuneTypeStringEscUC", - RuneTypeStringEscUD: "RuneTypeStringEscUD", - RuneTypeStringEnd: "RuneTypeStringEnd", - - RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", - RuneTypeNumberIntZero: "RuneTypeNumberIntZero", - RuneTypeNumberIntDig: "RuneTypeNumberIntDig", - RuneTypeNumberFracDot: "RuneTypeNumberFracDot", - RuneTypeNumberFracDig: "RuneTypeNumberFracDig", - RuneTypeNumberExpE: "RuneTypeNumberExpE", - RuneTypeNumberExpSign: "RuneTypeNumberExpSign", - RuneTypeNumberExpDig: "RuneTypeNumberExpDig", - - RuneTypeTrueT: "RuneTypeTrueT", - RuneTypeTrueR: "RuneTypeTrueR", - RuneTypeTrueU: "RuneTypeTrueU", - RuneTypeTrueE: "RuneTypeTrueE", - - RuneTypeFalseF: "RuneTypeFalseF", - RuneTypeFalseA: "RuneTypeFalseA", - RuneTypeFalseL: "RuneTypeFalseL", - RuneTypeFalseS: "RuneTypeFalseS", - RuneTypeFalseE: "RuneTypeFalseE", - - RuneTypeNullN: "RuneTypeNullN", - RuneTypeNullU: "RuneTypeNullU", - RuneTypeNullL1: "RuneTypeNullL1", - RuneTypeNullL2: "RuneTypeNullL2", - - RuneTypeEOF: "RuneTypeEOF", - - runeTypeAny: "runeTypeAny", - }[t] - if ok { - return str - } - return fmt.Sprintf("RuneType(%d)", t) -} - -// String implements fmt.Stringer. -// -//nolint:dupl // False positive due to similarly shaped AST. -func (t RuneType) String() string { - str, ok := map[RuneType]string{ - RuneTypeError: "x", - - RuneTypeSpace: " ", - - RuneTypeObjectBeg: "{", - RuneTypeObjectColon: ":", - RuneTypeObjectComma: "o", - RuneTypeObjectEnd: "}", - - RuneTypeArrayBeg: "[", - RuneTypeArrayComma: "a", - RuneTypeArrayEnd: "]", - - RuneTypeStringBeg: "\"", - RuneTypeStringChar: "c", - RuneTypeStringEsc: "\\", - RuneTypeStringEsc1: "b", - RuneTypeStringEscU: "u", - RuneTypeStringEscUA: "A", - RuneTypeStringEscUB: "B", - RuneTypeStringEscUC: "C", - RuneTypeStringEscUD: "D", - RuneTypeStringEnd: "ยป", - - RuneTypeNumberIntNeg: "-", - RuneTypeNumberIntZero: "0", - RuneTypeNumberIntDig: "1", - RuneTypeNumberFracDot: ".", - RuneTypeNumberFracDig: "2", - RuneTypeNumberExpE: "e", - RuneTypeNumberExpSign: "+", - RuneTypeNumberExpDig: "3", - - RuneTypeTrueT: "๐•ฅ", // double-struck - RuneTypeTrueR: "๐•ฃ", - RuneTypeTrueU: "๐•ฆ", - RuneTypeTrueE: "๐•–", - - RuneTypeFalseF: "๐”ฃ", // fraktur - RuneTypeFalseA: "๐”ž", - RuneTypeFalseL: "๐”ฉ", - RuneTypeFalseS: "๐”ฐ", - RuneTypeFalseE: "๐”ข", - - RuneTypeNullN: "โ“", // circled - RuneTypeNullU: "โ“ค", - RuneTypeNullL1: "โ“›", - RuneTypeNullL2: "โ“", // +uppercase - - RuneTypeEOF: "$", - - runeTypeAny: "?", - }[t] - if ok { - return str - } - return fmt.Sprintf("<%d>", t) -} - -func (t RuneType) JSONType() string { - return map[RuneType]string{ - RuneTypeObjectBeg: "object", - RuneTypeArrayBeg: "array", - RuneTypeStringBeg: "string", - RuneTypeNumberIntNeg: "number", - RuneTypeNumberIntZero: "number", - RuneTypeNumberIntDig: "number", - RuneTypeTrueT: "true", - RuneTypeFalseF: "false", - RuneTypeNullN: "null", - RuneTypeEOF: "eof", - }[t] -} - -// IsNumber returns whether the RuneType is one of the -// RuneTypeNumberXXX values. -func (t RuneType) IsNumber() bool { - return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig -} - -// Parser is the low-level JSON parser that powers both *Decoder and -// *ReEncoder. -type Parser struct { - // Setting MaxError to a value greater than 0 causes - // HandleRune to return ErrParserExceededMaxDepth if - // objects/arrays become nested more deeply than this. - MaxDepth int - - initialized bool - - err error - closed bool - - // We reuse RuneTypes to store the stack. The base idea is: - // stack items are "the most recently read stack-relevant - // RuneType". - // - // The stack starts out with the special pseudo-RuneType - // `runeTypeAny` that means we're willing to accept any - // element type; an empty stack means that we have reached the - // end of the top-level element and should accept no more - // input except for whitespace. - // - // The "normal" stack-relevant RuneTypes are: - // - // "\uABC for strings - // -01.2e+3 for numbers - // ๐•ฅ๐•ฃ๐•ฆ for "true" - // ๐”ฃ๐”ž๐”ฉ๐”ฐ for "false" - // โ“โ“คโ“› for "null" - // - // Objects and arrays break the "most recently read RuneType" - // rule; they need some special assignments: - // - // { object: waiting for key to start or '}' - // ยป object: reading key / waiting for colon - // o object: reading value / waiting for ',' or '}' - // - // [ array: waiting for item to start or ']' - // a array: reading item / waiting for ',' or ']' - // - // Within each element type, the stack item is replaced, not pushed. - // - // (Keep each of these examples in-sync with parse_test.go.) - // - // For example, given the input string - // - // {"x":"y","a":"b"} - // - // The stack would be - // - // stack processed - // ? - // { { - // ยป" {" - // ยป" {"x - // ยป {"x" - // o? {"x": - // o" {"x":" - // o" {"x":"y - // o {"x":"y" - // { {"x":"y", - // ยป" {"x":"y"," - // ยป" {"x":"y","a - // ยป {"x":"y","a" - // o? {"x":"y","a": - // o" {"x":"y","a":" - // o" {"x":"y","a":"b - // o {"x":"y","a":"b" - // {"x":"y","a":"b"} - // - // Or, given the input string - // - // ["x","y"] - // - // The stack would be - // - // stack processed - // ? - // [ [ - // a" [" - // a" ["x - // a ["x" - // a? ["x", - // a" ["x"," - // a" ["x","y - // a ["x","y" - // ["x","y"] - stack []RuneType - - barriers []barrier -} - -type barrier struct { - closed bool - stack []RuneType -} - -func (par *Parser) init() { - if !par.initialized { - par.initialized = true - par.pushState(runeTypeAny) - } -} - -func (par *Parser) pushState(state RuneType) RuneType { - par.stack = append(par.stack, state) - return state -} - -func (par *Parser) replaceState(state RuneType) RuneType { - par.stack[len(par.stack)-1] = state - return state -} - -func (par *Parser) popState() { - par.stack = par.stack[:len(par.stack)-1] -} - -func (par *Parser) stackString() string { - par.init() - var buf strings.Builder - for _, s := range par.stack { - buf.WriteString(s.String()) - } - return buf.String() -} - -func (par *Parser) depth() int { - n := len(par.stack) - for _, barrier := range par.barriers { - n += len(barrier.stack) - } - return n -} - -func (par *Parser) StackIsEmpty() bool { - if len(par.barriers) > 0 { - return false - } - if len(par.stack) == 0 { - return true - } - return len(par.stack) == 1 && par.stack[0] == runeTypeAny -} - -func (par *Parser) StackSize() int { - return len(par.stack) -} - -// Reset all Parser state. -func (par *Parser) Reset() { - *par = Parser{ - MaxDepth: par.MaxDepth, - } -} - -// PushReadBarrier causes the parser to expect EOF once the end of the -// element that is started by the current top-of-stack is reached, -// until this is un-done with PopBarrier. It essentially turns the -// parser in to a sub-parser. -// -// PushReadBarrier may only be called at the beginning of an element, -// whether that be -// -// - runeTypeAny -// - RuneTypeObjectBeg -// - RuneTypeArrayBeg -// - RuneTypeStringBeg -// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig -// - RuneTypeTrueT -// - RuneTypeFalseF -// - RuneTypeNullN -func (par *Parser) PushReadBarrier() { - // Sanity checking. - par.init() - if len(par.stack) == 0 { - panic(errors.New("illegal PushReadBarrier call: empty stack")) - } - curState := par.stack[len(par.stack)-1] - switch curState { - case runeTypeAny, - RuneTypeObjectBeg, - RuneTypeArrayBeg, - RuneTypeStringBeg, - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, - RuneTypeTrueT, - RuneTypeFalseF, - RuneTypeNullN: - // OK - default: - panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState)) - } - // Actually push. - par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack[:len(par.stack)-1], - }) - par.stack = []RuneType{curState} -} - -// PushWriteBarrier causes the parser to expect EOF once the end of -// the about-to-start element is reached, until this is un-done with -// PopBarrier. It essentially turns the parser in to a sub-parser. -// -// PushWriteBarrier may only be called at the places where an element -// of any type may start: -// -// - runeTypeAny for top-level and object-value elements -// - RuneTypeArrayBeg for array-item elements -// -// PushWriteBarrier signals intent to write an element; if it is -// called in a place where an element is optional (at the beginning of -// an array), it becomes a syntax error to not write the element. -func (par *Parser) PushWriteBarrier() { - par.init() - if len(par.stack) == 0 { - panic(errors.New("illegal PushWriteBarrier call: empty stack")) - } - switch par.stack[len(par.stack)-1] { - case runeTypeAny: - par.popState() - par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, - }) - par.stack = []RuneType{runeTypeAny} - case RuneTypeArrayBeg: - par.replaceState(RuneTypeArrayComma) - par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, - }) - par.stack = []RuneType{runeTypeAny} - default: - panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) - } -} - -// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. -func (par *Parser) PopBarrier() { - if len(par.barriers) == 0 { - panic(errors.New("illegal PopBarrier call: empty barrier stack")) - } - barrier := par.barriers[len(par.barriers)-1] - par.barriers = par.barriers[:len(par.barriers)-1] - par.closed = barrier.closed - par.stack = append(barrier.stack, par.stack...) -} - -// HandleEOF feeds EOF to the Parser. The returned RuneType is either -// RuneTypeEOF or RuneTypeError. -// -// An error is returned if and only if the RuneType is RuneTypeError. -// Returns io/fs.ErrClosed if .HandleEOF() has previously been called -// (and .Reset() has not been called since). -// -// Once RuneTypeError or RuneTypeEOF has been returned, it will keep -// being returned from both .HandleRune(c) and .HandleEOF() until -// .Reset() is called. -// -// RuneTypeEOF indicates that a complete JSON document has been read. -func (par *Parser) HandleEOF() (RuneType, error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - defer func() { - par.closed = true - }() - if par.err != nil { - return RuneTypeError, par.err - } - par.init() - switch len(par.stack) { - case 0: - return RuneTypeEOF, nil - case 1: - switch { - case par.stack[0].IsNumber(): - if _, err := par.HandleRune('\n'); err == nil { - return RuneTypeEOF, nil - } - case par.stack[0] == runeTypeAny: - par.err = io.EOF - return RuneTypeError, par.err - } - fallthrough - default: - par.err = io.ErrUnexpectedEOF - return RuneTypeError, par.err - } -} - -// HandleRune feeds a Unicode rune to the Parser. -// -// An error is returned if and only if the RuneType is RuneTypeError. -// Returns io/fs.ErrClosed if .HandleEOF() has previously been called -// (and .Reset() has not been called since). -// -// Once RuneTypeError or RuneTypeEOF has been returned, it will keep -// being returned from both .HandleRune(c) and .HandleEOF() until -// .Reset() is called. -// -// RuneTypeEOF indicates that the rune cannot be appended to the JSON -// document; a new JSON document must be started in order to process -// that rune. -func (par *Parser) HandleRune(c rune) (RuneType, error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - if par.err != nil { - return RuneTypeError, par.err - } - par.init() - if len(par.stack) == 0 { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - return RuneTypeEOF, nil - } - } - switch par.stack[len(par.stack)-1] { - // any ///////////////////////////////////////////////////////////////////////////////////// - case runeTypeAny: - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '{': - if par.MaxDepth > 0 && par.depth() > par.MaxDepth { - return RuneTypeError, ErrParserExceededMaxDepth - } - return par.replaceState(RuneTypeObjectBeg), nil - case '[': - if par.MaxDepth > 0 && par.depth() > par.MaxDepth { - return RuneTypeError, ErrParserExceededMaxDepth - } - return par.replaceState(RuneTypeArrayBeg), nil - case '"': - return par.replaceState(RuneTypeStringBeg), nil - case '-': - return par.replaceState(RuneTypeNumberIntNeg), nil - case '0': - return par.replaceState(RuneTypeNumberIntZero), nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - case 't': - return par.replaceState(RuneTypeTrueT), nil - case 'f': - return par.replaceState(RuneTypeFalseF), nil - case 'n': - return par.replaceState(RuneTypeNullN), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c) - } - // object ////////////////////////////////////////////////////////////////////////////////// - case RuneTypeObjectBeg: // waiting for key to start or '}' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '"': - par.replaceState(RuneTypeStringEnd) - return par.pushState(RuneTypeStringBeg), nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) - } - case RuneTypeStringEnd: // waiting for ':' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ':': - par.replaceState(RuneTypeObjectComma) - par.pushState(runeTypeAny) - return RuneTypeObjectColon, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after object key", c) - } - case RuneTypeObjectComma: // waiting for ',' or '}' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.replaceState(RuneTypeObjectBeg) - return RuneTypeObjectComma, nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c) - } - // array /////////////////////////////////////////////////////////////////////////////////// - case RuneTypeArrayBeg: // waiting for item to start or ']' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - par.replaceState(RuneTypeArrayComma) - par.pushState(runeTypeAny) - return par.HandleRune(c) - } - case RuneTypeArrayComma: // waiting for ',' or ']' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.pushState(runeTypeAny) - return RuneTypeArrayComma, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after array element", c) - } - // string ////////////////////////////////////////////////////////////////////////////////// - case RuneTypeStringBeg: // waiting for char or '"' - switch { - case c == '\\': - return par.replaceState(RuneTypeStringEsc), nil - case c == '"': - par.popState() - return RuneTypeStringEnd, nil - case 0x0020 <= c && c <= 0x10FFFF: - return RuneTypeStringChar, nil - default: - return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) - } - case RuneTypeStringEsc: // waiting for escape char - switch c { - case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEsc1, nil - case 'u': - return par.replaceState(RuneTypeStringEscU), nil - default: - return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) - } - case RuneTypeStringEscU: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUA: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUB: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUC: - if _, ok := HexToInt(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - // number ////////////////////////////////////////////////////////////////////////////////// - // - // Here's a flattened drawing of the syntax diagram from www.json.org : - // - // [------------ integer ----------][-- fraction ---][-------- exponent -------] - // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> - // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ - // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ - // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ - // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ - // โ”‚ โ”‚ - // โ•ฐโ”€"+"โ”€โ•ฏ - // - // Now here it is slightly redrawn, and with each distinct state our - // parser can be in marked with a single-capital-letter: - // - // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] - // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> - // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ - // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ - // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ - // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ - // โ”‚ โ”‚ - // โ•ฐโ”€"+"โ”€โ•ฏ - // - // You may notice that each of these states may be uniquely identified - // by the last-read RuneType: - // - // A = (nothing yet) - // B = IntNeg - // C = IntZero - // D = IntDig - // E = FracDot - // F = FracDig - // G = ExpE - // H = ExpSign - // I = ExpDig - // - // The 'A' state is part of the runeTypeAny case above, and - // the remainder follow: - case RuneTypeNumberIntNeg: // B - switch c { - case '0': - return par.replaceState(RuneTypeNumberIntZero), nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberIntZero: // C - switch c { - case '.': - return par.replaceState(RuneTypeNumberFracDot), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberIntDig: // D - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - case '.': - return par.replaceState(RuneTypeNumberFracDot), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberFracDot: // E - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberFracDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberFracDig: // F - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberFracDig), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberExpE: // G - switch c { - case '-', '+': - return par.replaceState(RuneTypeNumberExpSign), nil - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberExpSign: // H - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberExpDig: // I - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - par.popState() - return par.HandleRune(c) - } - // literals //////////////////////////////////////////////////////////////////////////////// - // true - case RuneTypeTrueT: - return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) - case RuneTypeTrueR: - return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) - case RuneTypeTrueU: - return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) - // false - case RuneTypeFalseF: - return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) - case RuneTypeFalseA: - return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) - case RuneTypeFalseL: - return par.expectRune(c, 's', RuneTypeFalseS, "false", false) - case RuneTypeFalseS: - return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) - // null - case RuneTypeNullN: - return par.expectRune(c, 'u', RuneTypeNullU, "null", false) - case RuneTypeNullU: - return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) - case RuneTypeNullL1: - return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) - default: - panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString())) - } -} - -func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { - if c != exp { - return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp) - } - if pop { - par.popState() - return typ, nil - } else { - return par.replaceState(typ), nil - } -} -- cgit v1.2.3-2-g168b