From 7ce002e865971eb5425230a8a1dec7d936efa1d0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 14 Aug 2022 12:51:09 -0600 Subject: parse: Figure out a good end-of-number system, improve comments --- parse.go | 214 +++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 140 insertions(+), 74 deletions(-) diff --git a/parse.go b/parse.go index d4b55eb..58deb0b 100644 --- a/parse.go +++ b/parse.go @@ -62,6 +62,8 @@ const ( RuneTypeNullU RuneTypeNullL1 RuneTypeNullL2 + + RuneTypeEOF ) func (t RuneType) String() string { @@ -114,6 +116,8 @@ func (t RuneType) String() string { RuneTypeNullU: "ⓤ", RuneTypeNullL1: "ⓛ", RuneTypeNullL2: "Ⓛ", // +uppercase + + RuneTypeEOF: "$", }[t] if ok { return str @@ -125,80 +129,67 @@ func (t RuneType) IsNumber() bool { return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig } -// { waiting for key to start or '}' -// ” reading key / waiting for colon -// : waiting for value to start -// , reading value / waiting for ',' or '}' -// -// {"x":"y","a":"b"} -// -// { { -// ”“ {" -// ”“ {"x -// ” {"x" -// : {"x": -// o“ {"x":" -// o“ {"x":"y -// o {"x":"y" -// { {"x":"y", -// ”“ {"x":"y"," -// ”“ {"x":"y","a -// ” {"x":"y","a" -// : {"x":"y","a": -// o“ {"x":" -// o“ {"x":"y -// o {"x":"y" -// -// [ waiting for item to start or ']' -// a reading item / waiting for ',' or ']' - -type parseState func(rune) (RuneType, error) - type Parser struct { + initialized bool + err error closed bool - bailAfterCurrent bool // bad hack - + // We reuse RuneTypes to store the stack. The base idea is + // that, stack items are "the most recently read + // stack-relevant RuneType". + // + // We treat RuneTypeError as a wildcard. + // + // The "normal"stack-relevant RuneTypes are: + // + // “\uABC for strings + // -01.2e+3 for numbers + // 𝕥𝕣𝕦 for "true" + // 𝔣𝔞𝔩𝔰 for "false" + // ⓝⓤⓛ for "null" + // + // Objects and arrays break the "most recently read RuneType" + // rule; they need some special assignments: + // + // { object: waiting for key to start or '}' + // ” object: reading key / waiting for colon + // : object: waiting for value to start + // o object: reading value / waiting for ',' or '}' + // + // [ array: waiting for item to start or ']' + // a array: reading item / waiting for ',' or ']' + // + // Within each element type, the stack item is replaced, not pushed. + // + // For example, given the input string + // + // {"x":"y","a":"b"} + // + // The stack would be + // + // stack processed + // x + // { { + // ”“ {" + // ”“ {"x + // ” {"x" + // : {"x": + // o“ {"x":" + // o“ {"x":"y + // o {"x":"y" + // { {"x":"y", + // ”“ {"x":"y"," + // ”“ {"x":"y","a + // ” {"x":"y","a" + // : {"x":"y","a": + // o“ {"x":"y","a":" + // o“ {"x":"y","a":"b + // o {"x":"y","a":"b" + // {"x":"y","a":"b"} stack []RuneType } -// public API ////////////////////////////////////////////////////////////////////////////////////// - -func (par *Parser) HandleRune(c rune) (typ RuneType, err error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - if par.err != nil { - return RuneTypeError, par.err - } - return par.handleRune(c) -} - -func (par *Parser) HandleEOF() error { - if par.closed { - return iofs.ErrClosed - } - if par.err == nil { - switch len(par.stack) { - case 0: - par.err = nil - case 1: - if par.stack[0].IsNumber() { - _, par.err = par.handleRune('\n') - break - } - fallthrough - default: - par.err = io.ErrUnexpectedEOF - } - } - par.closed = true - return par.err -} - -// internal //////////////////////////////////////////////////////////////////////////////////////// - func (par *Parser) pushState(state RuneType) RuneType { par.stack = append(par.stack, state) return state @@ -219,10 +210,85 @@ func (par *Parser) stackString() string { return buf.String() } -func (par *Parser) handleRune(c rune) (RuneType, error) { - if len(par.stack) == 0 { +// Reset all Parser state. +func (par *Parser) Reset() { + *par = Parser{} +} + +// HandleEOF feeds EOF to the Parser. The returned RuneType is either +// RuneTypeEOF or RuneTypeError. +// +// An error is returned if and only if the RuneType is RuneTypeError. +// Returns io/fs.ErrClosed if .HandleEOF() has previously been called +// (and .Reset() has not been called since). +// +// Once RuneTypeError or RuneTypeEOF has been returned, it will keep +// being returned from both .HandleRune(c) and .HandleEOF() until +// .Reset() is called. +// +// RuneTypeEOF indicates that a complete JSON document has been read. +func (par *Parser) HandleEOF() (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed + } + defer func() { + par.closed = true + }() + if par.err != nil { + return RuneTypeError, par.err + } + if !par.initialized { + par.initialized = true par.pushState(RuneTypeError) } + switch len(par.stack) { + case 0: + return RuneTypeEOF, nil + case 1: + if par.stack[0].IsNumber() { + if _, err := par.HandleRune('\n'); err == nil { + return RuneTypeEOF, nil + } + } + fallthrough + default: + par.err = io.ErrUnexpectedEOF + return RuneTypeError, par.err + } +} + +// HandleRune feeds a Unicode rune to the Parser. +// +// An error is returned if and only if the RuneType is RuneTypeError. +// Returns io/fs.ErrClosed if .HandleEOF() has previously been called +// (and .Reset() has not been called since). +// +// Once RuneTypeError or RuneTypeEOF has been returned, it will keep +// being returned from both .HandleRune(c) and .HandleEOF() until +// .Reset() is called. +// +// RuneTypeEOF indicates that the rune cannot be appended to the JSON +// document; a new JSON document must be started in order to process +// that rune. +func (par *Parser) HandleRune(c rune) (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed + } + if par.err != nil { + return RuneTypeError, par.err + } + if !par.initialized { + par.initialized = true + par.pushState(RuneTypeError) + } + if len(par.stack) == 0 { + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + default: + return RuneTypeEOF, nil + } + } switch par.stack[len(par.stack)-1] { // any ///////////////////////////////////////////////////////////////////////////////////// case RuneTypeError: @@ -299,7 +365,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) { default: par.replaceState(RuneTypeArrayComma) par.pushState(RuneTypeError) - return par.handleRune(c) + return par.HandleRune(c) } case RuneTypeArrayComma: // waiting for ',' or ']' switch c { @@ -419,7 +485,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() - return par.handleRune(c) + return par.HandleRune(c) } case RuneTypeNumberIntDig: // D switch c { @@ -431,7 +497,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() - return par.handleRune(c) + return par.HandleRune(c) } case RuneTypeNumberFracDot: // E switch c { @@ -448,7 +514,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() - return par.handleRune(c) + return par.HandleRune(c) } case RuneTypeNumberExpE: // G switch c { @@ -472,7 +538,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpDig), nil default: par.popState() - return par.handleRune(c) + return par.HandleRune(c) } // literals //////////////////////////////////////////////////////////////////////////////// // true -- cgit v1.2.3-2-g168b