summaryrefslogtreecommitdiff
path: root/parse.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2022-08-14 12:51:09 -0600
committerLuke Shumaker <lukeshu@lukeshu.com>2022-08-14 13:45:02 -0600
commit7ce002e865971eb5425230a8a1dec7d936efa1d0 (patch)
tree948422b758962d95d9d886d60be2a22250cbb4d8 /parse.go
parent4b00a61c33d6a448c59c5509c0a408f527308c8b (diff)
parse: Figure out a good end-of-number system, improve comments
Diffstat (limited to 'parse.go')
-rw-r--r--parse.go214
1 files changed, 140 insertions, 74 deletions
diff --git a/parse.go b/parse.go
index d4b55eb..58deb0b 100644
--- a/parse.go
+++ b/parse.go
@@ -62,6 +62,8 @@ const (
RuneTypeNullU
RuneTypeNullL1
RuneTypeNullL2
+
+ RuneTypeEOF
)
func (t RuneType) String() string {
@@ -114,6 +116,8 @@ func (t RuneType) String() string {
RuneTypeNullU: "ⓤ",
RuneTypeNullL1: "ⓛ",
RuneTypeNullL2: "Ⓛ", // +uppercase
+
+ RuneTypeEOF: "$",
}[t]
if ok {
return str
@@ -125,80 +129,67 @@ func (t RuneType) IsNumber() bool {
return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig
}
-// { waiting for key to start or '}'
-// ” reading key / waiting for colon
-// : waiting for value to start
-// , reading value / waiting for ',' or '}'
-//
-// {"x":"y","a":"b"}
-//
-// { {
-// ”“ {"
-// ”“ {"x
-// ” {"x"
-// : {"x":
-// o“ {"x":"
-// o“ {"x":"y
-// o {"x":"y"
-// { {"x":"y",
-// ”“ {"x":"y","
-// ”“ {"x":"y","a
-// ” {"x":"y","a"
-// : {"x":"y","a":
-// o“ {"x":"
-// o“ {"x":"y
-// o {"x":"y"
-//
-// [ waiting for item to start or ']'
-// a reading item / waiting for ',' or ']'
-
-type parseState func(rune) (RuneType, error)
-
type Parser struct {
+ initialized bool
+
err error
closed bool
- bailAfterCurrent bool // bad hack
-
+ // We reuse RuneTypes to store the stack. The base idea is
+ // that, stack items are "the most recently read
+ // stack-relevant RuneType".
+ //
+ // We treat RuneTypeError as a wildcard.
+ //
+ // The "normal"stack-relevant RuneTypes are:
+ //
+ // “\uABC for strings
+ // -01.2e+3 for numbers
+ // 𝕥𝕣𝕦 for "true"
+ // 𝔣𝔞𝔩𝔰 for "false"
+ // ⓝⓤⓛ for "null"
+ //
+ // Objects and arrays break the "most recently read RuneType"
+ // rule; they need some special assignments:
+ //
+ // { object: waiting for key to start or '}'
+ // ” object: reading key / waiting for colon
+ // : object: waiting for value to start
+ // o object: reading value / waiting for ',' or '}'
+ //
+ // [ array: waiting for item to start or ']'
+ // a array: reading item / waiting for ',' or ']'
+ //
+ // Within each element type, the stack item is replaced, not pushed.
+ //
+ // For example, given the input string
+ //
+ // {"x":"y","a":"b"}
+ //
+ // The stack would be
+ //
+ // stack processed
+ // x
+ // { {
+ // ”“ {"
+ // ”“ {"x
+ // ” {"x"
+ // : {"x":
+ // o“ {"x":"
+ // o“ {"x":"y
+ // o {"x":"y"
+ // { {"x":"y",
+ // ”“ {"x":"y","
+ // ”“ {"x":"y","a
+ // ” {"x":"y","a"
+ // : {"x":"y","a":
+ // o“ {"x":"y","a":"
+ // o“ {"x":"y","a":"b
+ // o {"x":"y","a":"b"
+ // {"x":"y","a":"b"}
stack []RuneType
}
-// public API //////////////////////////////////////////////////////////////////////////////////////
-
-func (par *Parser) HandleRune(c rune) (typ RuneType, err error) {
- if par.closed {
- return RuneTypeError, iofs.ErrClosed
- }
- if par.err != nil {
- return RuneTypeError, par.err
- }
- return par.handleRune(c)
-}
-
-func (par *Parser) HandleEOF() error {
- if par.closed {
- return iofs.ErrClosed
- }
- if par.err == nil {
- switch len(par.stack) {
- case 0:
- par.err = nil
- case 1:
- if par.stack[0].IsNumber() {
- _, par.err = par.handleRune('\n')
- break
- }
- fallthrough
- default:
- par.err = io.ErrUnexpectedEOF
- }
- }
- par.closed = true
- return par.err
-}
-
-// internal ////////////////////////////////////////////////////////////////////////////////////////
-
func (par *Parser) pushState(state RuneType) RuneType {
par.stack = append(par.stack, state)
return state
@@ -219,10 +210,85 @@ func (par *Parser) stackString() string {
return buf.String()
}
-func (par *Parser) handleRune(c rune) (RuneType, error) {
- if len(par.stack) == 0 {
+// Reset all Parser state.
+func (par *Parser) Reset() {
+ *par = Parser{}
+}
+
+// HandleEOF feeds EOF to the Parser. The returned RuneType is either
+// RuneTypeEOF or RuneTypeError.
+//
+// An error is returned if and only if the RuneType is RuneTypeError.
+// Returns io/fs.ErrClosed if .HandleEOF() has previously been called
+// (and .Reset() has not been called since).
+//
+// Once RuneTypeError or RuneTypeEOF has been returned, it will keep
+// being returned from both .HandleRune(c) and .HandleEOF() until
+// .Reset() is called.
+//
+// RuneTypeEOF indicates that a complete JSON document has been read.
+func (par *Parser) HandleEOF() (RuneType, error) {
+ if par.closed {
+ return RuneTypeError, iofs.ErrClosed
+ }
+ defer func() {
+ par.closed = true
+ }()
+ if par.err != nil {
+ return RuneTypeError, par.err
+ }
+ if !par.initialized {
+ par.initialized = true
par.pushState(RuneTypeError)
}
+ switch len(par.stack) {
+ case 0:
+ return RuneTypeEOF, nil
+ case 1:
+ if par.stack[0].IsNumber() {
+ if _, err := par.HandleRune('\n'); err == nil {
+ return RuneTypeEOF, nil
+ }
+ }
+ fallthrough
+ default:
+ par.err = io.ErrUnexpectedEOF
+ return RuneTypeError, par.err
+ }
+}
+
+// HandleRune feeds a Unicode rune to the Parser.
+//
+// An error is returned if and only if the RuneType is RuneTypeError.
+// Returns io/fs.ErrClosed if .HandleEOF() has previously been called
+// (and .Reset() has not been called since).
+//
+// Once RuneTypeError or RuneTypeEOF has been returned, it will keep
+// being returned from both .HandleRune(c) and .HandleEOF() until
+// .Reset() is called.
+//
+// RuneTypeEOF indicates that the rune cannot be appended to the JSON
+// document; a new JSON document must be started in order to process
+// that rune.
+func (par *Parser) HandleRune(c rune) (RuneType, error) {
+ if par.closed {
+ return RuneTypeError, iofs.ErrClosed
+ }
+ if par.err != nil {
+ return RuneTypeError, par.err
+ }
+ if !par.initialized {
+ par.initialized = true
+ par.pushState(RuneTypeError)
+ }
+ if len(par.stack) == 0 {
+ switch c {
+ case 0x0020, 0x000A, 0x000D, 0x0009:
+ return RuneTypeSpace, nil
+ default:
+ return RuneTypeEOF, nil
+ }
+ }
switch par.stack[len(par.stack)-1] {
// any /////////////////////////////////////////////////////////////////////////////////////
case RuneTypeError:
@@ -299,7 +365,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) {
default:
par.replaceState(RuneTypeArrayComma)
par.pushState(RuneTypeError)
- return par.handleRune(c)
+ return par.HandleRune(c)
}
case RuneTypeArrayComma: // waiting for ',' or ']'
switch c {
@@ -419,7 +485,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) {
return par.replaceState(RuneTypeNumberExpE), nil
default:
par.popState()
- return par.handleRune(c)
+ return par.HandleRune(c)
}
case RuneTypeNumberIntDig: // D
switch c {
@@ -431,7 +497,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) {
return par.replaceState(RuneTypeNumberExpE), nil
default:
par.popState()
- return par.handleRune(c)
+ return par.HandleRune(c)
}
case RuneTypeNumberFracDot: // E
switch c {
@@ -448,7 +514,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) {
return par.replaceState(RuneTypeNumberExpE), nil
default:
par.popState()
- return par.handleRune(c)
+ return par.HandleRune(c)
}
case RuneTypeNumberExpE: // G
switch c {
@@ -472,7 +538,7 @@ func (par *Parser) handleRune(c rune) (RuneType, error) {
return par.replaceState(RuneTypeNumberExpDig), nil
default:
par.popState()
- return par.handleRune(c)
+ return par.HandleRune(c)
}
// literals ////////////////////////////////////////////////////////////////////////////////
// true