// Copyright (C) 2022 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "fmt" "io" iofs "io/fs" ) type RuneType uint8 const ( RuneTypeError = RuneType(iota) RuneTypeSpace // whitespace RuneTypeObjectBeg // '{' RuneTypeObjectColon // ':' RuneTypeObjectComma // ',' RuneTypeObjectEnd // '}' RuneTypeArrayBeg // '[' RuneTypeArrayComma // ',' RuneTypeArrayEnd // ']' RuneTypeStringBeg // opening '"' RuneTypeStringChar // normal character RuneTypeStringEsc // backslash RuneTypeStringEsc1 // single-char after a backslash RuneTypeStringEscU // \uABCD : u RuneTypeStringEscUA // \uABCD : A RuneTypeStringEscUB // \uABCD : B RuneTypeStringEscUC // \uABCD : C RuneTypeStringEscUD // \uABCD : D RuneTypeStringEnd // closing '"' RuneTypeNumberInt // 0|[1-9][0-9]* RuneTypeNumberFrac // \.[0-9]* RuneTypeNumberExp // [eE][-+]?[0-9] RuneTypeTrueT RuneTypeTrueR RuneTypeTrueU RuneTypeTrueE RuneTypeFalseF RuneTypeFalseA RuneTypeFalseL RuneTypeFalseS RuneTypeFalseE RuneTypeNullN RuneTypeNullU RuneTypeNullL1 RuneTypeNullL2 ) type parseState func(rune) (RuneType, error) type parser struct { err error closed bool stack []parseState stack0IsNumber bool // whether stack[0] is a number-state; affects how EOF is handled } // "public" API //////////////////////////////////////////////////////////////////////////////////// func (par *parser) HandleRune(c rune) (RuneType, error) { if par.closed { return RuneTypeError, iofs.ErrClosed } if par.err != nil { return RuneTypeError, par.err } return par.state(c) } func (par *parser) HandleEOF() error { if par.closed { return iofs.ErrClosed } if par.err == nil { switch len(par.stack) { case 0: par.err = nil case 1: if par.stack0IsNumber { _, par.err = par.state('\n') } fallthrough default: par.err = io.ErrUnexpectedEOF } } par.closed = true return par.err } // state helpers /////////////////////////////////////////////////////////////////////////////////// func (par *parser) pushState(state parseState, isNumber bool) { if len(par.stack) == 0 { par.stack0IsNumber = isNumber } par.stack = append(par.stack, state) } func (par *parser) replaceState(state parseState, isNumber bool) { if len(par.stack) == 1 { par.stack0IsNumber = isNumber } par.stack[len(par.stack)-1] = state } func (par *parser) popState() { if len(par.stack) == 1 { par.stack0IsNumber = false } par.stack = par.stack[:len(par.stack)-1] } func (par *parser) state(c rune) (RuneType, error) { if len(par.stack) == 0 { par.pushState(par.stateAny, false) } return par.stack[len(par.stack)-1](c) } // state: any ////////////////////////////////////////////////////////////////////////////////////// func (par *parser) stateAny(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '{': par.replaceState(par.stateInObject, false) return RuneTypeObjectBeg, nil case '[': par.replaceState(par.stateInArray, false) return RuneTypeArrayBeg, nil case '"': par.replaceState(par.stateInString, false) return RuneTypeStringBeg, nil case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': par.replaceState(par.stateNumberA, true) return par.state(c) case 't': par.replaceState(par.stateTrueT, false) return RuneTypeTrueT, nil case 'f': par.replaceState(par.stateFalseF, false) return RuneTypeFalseF, nil case 'n': par.replaceState(par.stateNullN, false) return RuneTypeNullN, nil default: return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c) } } // state: object /////////////////////////////////////////////////////////////////////////////////// func (par *parser) stateInObject(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '"': par.replaceState(par.stateAfterK, false) par.pushState(par.stateInString, false) return RuneTypeStringBeg, nil case '}': par.popState() return RuneTypeObjectEnd, nil default: return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) } } func (par *parser) stateAfterK(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ':': par.replaceState(par.stateAfterV, false) par.pushState(par.stateAny, false) return RuneTypeObjectColon, nil default: return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) } } func (par *parser) stateAfterV(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ',': par.replaceState(par.stateInObject, false) return RuneTypeObjectComma, nil case '}': par.popState() return RuneTypeObjectEnd, nil default: return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) } } // state: array //////////////////////////////////////////////////////////////////////////////////// func (par *parser) stateInArray(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ']': par.popState() return RuneTypeArrayEnd, nil default: par.replaceState(par.stateAfterItem, false) par.pushState(par.stateAny, false) return par.state(c) } } func (par *parser) stateAfterItem(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ',': par.replaceState(par.stateInArray, false) return RuneTypeArrayComma, nil case ']': par.popState() return RuneTypeArrayEnd, nil default: return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c) } } // state: string /////////////////////////////////////////////////////////////////////////////////// func (par *parser) stateInString(c rune) (RuneType, error) { switch { case c == '\\': par.replaceState(par.stateInEsc, false) return RuneTypeStringEsc, nil case c == '"': par.popState() return RuneTypeStringEnd, nil case 0x0020 <= c && c <= 0x10FFFF: return RuneTypeStringChar, nil default: return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) } } func (par *parser) stateInEsc(c rune) (RuneType, error) { switch c { case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': par.replaceState(par.stateInString, false) return RuneTypeStringEsc1, nil case 'u': par.replaceState(par.stateInEscU, false) return RuneTypeStringEscU, nil default: return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) } } func (par *parser) _stateInEscU(c rune, typ RuneType, nxt parseState) (RuneType, error) { switch { case ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'): par.replaceState(nxt, false) return typ, nil default: return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } } func (par *parser) stateInEscU(c rune) (RuneType, error) { return par._stateInEscU(c, RuneTypeStringEscUA, par.stateInEscUA) } func (par *parser) stateInEscUA(c rune) (RuneType, error) { return par._stateInEscU(c, RuneTypeStringEscUB, par.stateInEscUB) } func (par *parser) stateInEscUB(c rune) (RuneType, error) { return par._stateInEscU(c, RuneTypeStringEscUC, par.stateInEscUC) } func (par *parser) stateInEscUC(c rune) (RuneType, error) { return par._stateInEscU(c, RuneTypeStringEscUD, par.stateInString) } // state: number /////////////////////////////////////////////////////////////////////////////////// // Here's a flattened drawing of the syntax diagram from www.json.org : // // [------------ integer ----------][-- fraction ---][-------- exponent -------] // >─╮─────╭─╮─"0"───────╭─────────╭──╮─────────────╭──╮───────────────────────╭─> // │ │ │ │ │ │ │ │ │ // ╰─"-"─╯ ╰─digit 1-9─╯─╭digit╮─╯ ╰─"."─╭digit╮─╯ ╰─"e"─╭─╮─────╭─╭digit╮─╯ // ╰──<──╯ ╰──<──╯ │ │ │ │ ╰──<──╯ // ╰─"E"─╯ ╰─"-"─╯ // │ │ // ╰─"+"─╯ // // Now here it is slightly redrawn, and with each distinct state our // parser can be in marked with a single-capital-letter: // // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] // >─A─╮───────╭──╮─"0"─────────C─╭─────────╮──────────────────╭─────────╮──────────────────────────╭─> // │ │ │ │ │ │ │ │ // ╰─"-"─B─╯ ╰─digit 1-9─╭─D─╯─digit╮ ╰─"."─E─digit──╭─F─╯─digit╮ ╰─"e"─╭─G─╮─────╭─╭digit─H─╯ // ╰────<─────╯ ╰────<─────╯ │ │ │ │ ╰────<───╯ // ╰─"E"─╯ ╰─"-"─╯ // │ │ // ╰─"+"─╯ // // Which state we're at is the 'X' in 'stateNumberX'. // number: integer-part //////////////////////////////////////////////////////// func (par *parser) stateNumberA(c rune) (RuneType, error) { // start switch c { case '-': par.replaceState(par.stateNumberB, true) return RuneTypeNumberInt, nil case '0': par.replaceState(par.stateNumberC, true) return RuneTypeNumberInt, nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': par.replaceState(par.stateNumberD, true) return RuneTypeNumberInt, nil default: return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) } } func (par *parser) stateNumberB(c rune) (RuneType, error) { // got a leading "-" switch c { case '0': par.replaceState(par.stateNumberC, true) return RuneTypeNumberInt, nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': par.replaceState(par.stateNumberD, true) return RuneTypeNumberInt, nil default: return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) } } func (par *parser) stateNumberC(c rune) (RuneType, error) { // ready for the fraction or exponent part to start switch c { case '.': par.replaceState(par.stateNumberE, true) return RuneTypeNumberFrac, nil case 'e', 'E': par.replaceState(par.stateNumberG, true) return RuneTypeNumberExp, nil default: par.popState() return par.state(c) } } func (par *parser) stateNumberD(c rune) (RuneType, error) { // in the integer part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return RuneTypeNumberInt, nil case '.': par.replaceState(par.stateNumberE, true) return RuneTypeNumberFrac, nil case 'e', 'E': par.replaceState(par.stateNumberG, true) return RuneTypeNumberExp, nil default: par.popState() return par.state(c) } } // number: fraction-part /////////////////////////////////////////////////////// func (par *parser) stateNumberE(c rune) (RuneType, error) { // got a ".", ready to read a number for the fraction part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': par.replaceState(par.stateNumberF, true) return RuneTypeNumberFrac, nil default: return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) } } func (par *parser) stateNumberF(c rune) (RuneType, error) { // in the fraction part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return RuneTypeNumberFrac, nil case 'e', 'E': par.replaceState(par.stateNumberG, true) return RuneTypeNumberExp, nil default: par.popState() return par.state(c) } } // number: exponent-part /////////////////////////////////////////////////////// func (par *parser) stateNumberG(c rune) (RuneType, error) { // got a leading "e" switch c { case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': par.replaceState(par.stateNumberH, true) return RuneTypeNumberExp, nil default: return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c) } } func (par *parser) stateNumberH(c rune) (RuneType, error) { // in the exponent's number part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return RuneTypeNumberExp, nil default: par.popState() return par.state(c) } } // state: literals ///////////////////////////////////////////////////////////////////////////////// func (par *parser) l(c rune, full string, exp rune, typ RuneType, nxt parseState) (RuneType, error) { if c != exp { return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", full, c) } if nxt == nil { par.popState() } else { par.replaceState(nxt, false) } return typ, nil } func (par *parser) stateTrueT(c rune) (RuneType, error) { return par.l(c, "true", 'r', RuneTypeTrueR, par.stateTrueR) } func (par *parser) stateTrueR(c rune) (RuneType, error) { return par.l(c, "true", 'u', RuneTypeTrueU, par.stateTrueU) } func (par *parser) stateTrueU(c rune) (RuneType, error) { return par.l(c, "true", 'e', RuneTypeTrueR, nil) } func (par *parser) stateFalseF(c rune) (RuneType, error) { return par.l(c, "false", 'a', RuneTypeFalseA, par.stateFalseA) } func (par *parser) stateFalseA(c rune) (RuneType, error) { return par.l(c, "false", 'l', RuneTypeFalseL, par.stateFalseL) } func (par *parser) stateFalseL(c rune) (RuneType, error) { return par.l(c, "false", 's', RuneTypeFalseS, par.stateFalseS) } func (par *parser) stateFalseS(c rune) (RuneType, error) { return par.l(c, "false", 'e', RuneTypeFalseE, nil) } func (par *parser) stateNullN(c rune) (RuneType, error) { return par.l(c, "null", 'u', RuneTypeNullU, par.stateNullU) } func (par *parser) stateNullU(c rune) (RuneType, error) { return par.l(c, "null", 'l', RuneTypeNullL1, par.stateNullL) } func (par *parser) stateNullL(c rune) (RuneType, error) { return par.l(c, "null", 'l', RuneTypeNullL2, nil) }