From 2ba5d96ccba101e6ccbf32b08e2fd18d4b8d7787 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 13 Aug 2022 22:05:20 -0600 Subject: parse_scan: Add reader abstractions on top of Parser --- parse_scan.go | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 parse_scan.go (limited to 'parse_scan.go') diff --git a/parse_scan.go b/parse_scan.go new file mode 100644 index 0000000..e75f1c5 --- /dev/null +++ b/parse_scan.go @@ -0,0 +1,241 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "errors" + "fmt" + "io" +) + +type ReadError struct { + Err error + Offset int64 +} + +func (e *ReadError) Error() string { + return fmt.Sprintf("json: I/O error at input byte %v: %v", e.Offset, e.Err) +} +func (e *ReadError) Unwrap() error { return e.Err } + +type SyntaxError struct { + Err string + Offset int64 +} + +func (e *SyntaxError) Error() string { + return fmt.Sprintf("json: syntax error at input byte %v: %v", e.Offset, e.Err) +} + +type runeTypeScanner interface { + // The returned error is a *ReadError, a *SyntaxError, or nil. + // An EOF condition is represented either as + // + // (char, size, RuneTypeEOF, nil) + // + // or + // + // (char, size, RuneTypeError, &SyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) + ReadRuneType() (rune, int, RuneType, error) + // The returned error is a *ReadError, a *SyntaxError, io.EOF, or nil. + ReadRune() (rune, int, error) + UnreadRune() error + Reset() + InputOffset() int64 +} + +// runeTypeScannerImpl ///////////////////////////////////////////////////////////////////////////// + +type runeTypeScannerImpl struct { + inner io.RuneReader + + parser Parser + offset int64 + + repeat bool + stuck bool + rRune rune + rSize int + rType RuneType + rErr error +} + +var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) + +func (sc *runeTypeScannerImpl) Reset() { + sc.parser.Reset() + sc.stuck = false + sc.repeat = false +} + +func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, RuneType, error) { + switch { + case sc.stuck: + // do nothing + case sc.repeat: + if _, ok := sc.inner.(io.RuneScanner); ok { + sc.inner.ReadRune() + } + default: + var err error + sc.rRune, sc.rSize, err = sc.inner.ReadRune() + sc.offset += int64(sc.rSize) + switch err { + case nil: + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &SyntaxError{ + Offset: sc.offset, + Err: err.Error(), + } + } else { + sc.rErr = nil + } + case io.EOF: + sc.rType, err = sc.parser.HandleEOF() + if err != nil { + sc.rErr = &SyntaxError{ + Offset: sc.offset, + Err: err.Error(), + } + } else { + sc.rErr = nil + } + default: + sc.rType = 0 + sc.rErr = &ReadError{ + Offset: sc.offset, + Err: err, + } + } + } + sc.repeat = false + sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError + return sc.rRune, sc.rSize, sc.rType, sc.rErr +} + +func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { + r, s, t, e := sc.ReadRuneType() + switch t { + case RuneTypeEOF: + return 0, 0, io.EOF + case RuneTypeError: + return 0, 0, e + default: + return r, s, nil + } +} + +var ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune") + +// UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). If the +// last call to .ReadRune() or .ReadRuneType() has already been +// unread, or if that call returned an error or RuneTypeEOF, then +// ErrInvalidRune is returned. Otherwise, nil is returned. +func (sc *runeTypeScannerImpl) UnreadRune() error { + if sc.stuck || sc.repeat { + return ErrInvalidUnreadRune + } + sc.repeat = true + if rs, ok := sc.inner.(io.RuneScanner); ok { + _ = rs.UnreadRune() + } + return nil +} + +func (sc *runeTypeScannerImpl) InputOffset() int64 { + ret := sc.offset + if sc.repeat { + ret -= int64(sc.rSize) + } + return ret +} + +// noWSRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// + +type noWSRuneTypeScanner struct { + inner runeTypeScanner +} + +var _ runeTypeScanner = (*noWSRuneTypeScanner)(nil) + +func (sc *noWSRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { +again: + r, s, t, e := sc.inner.ReadRuneType() + if t == RuneTypeSpace { + goto again + } + return r, s, t, e +} + +func (sc *noWSRuneTypeScanner) ReadRune() (rune, int, error) { + r, s, t, e := sc.ReadRuneType() + switch t { + case RuneTypeEOF: + return 0, 0, io.EOF + case RuneTypeError: + return 0, 0, e + default: + return r, s, nil + } +} + +func (sc *noWSRuneTypeScanner) UnreadRune() error { return sc.inner.UnreadRune() } +func (sc *noWSRuneTypeScanner) Reset() { sc.inner.Reset() } +func (sc *noWSRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } + +// elemRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// + +type elemRuneTypeScanner struct { + inner runeTypeScanner + + parser Parser + repeat bool + rType RuneType +} + +var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) + +func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { + r, s, t, e := sc.inner.ReadRuneType() + + // Check if we need to insert a premature EOF + if t != RuneTypeError && t != RuneTypeEOF { + if sc.repeat { + sc.repeat = false + } else { + sc.rType, _ = sc.parser.HandleRune(r) + } + if sc.rType == RuneTypeEOF { + _ = sc.inner.UnreadRune() + } + t = sc.rType + } + if t == RuneTypeEOF { + return 0, 0, RuneTypeEOF, nil + } + + return r, s, t, e +} + +func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { + r, s, t, e := sc.ReadRuneType() + switch t { + case RuneTypeEOF: + return 0, 0, io.EOF + case RuneTypeError: + return 0, 0, e + default: + return r, s, nil + } +} + +func (sc *elemRuneTypeScanner) UnreadRune() error { + sc.repeat = true + return sc.inner.UnreadRune() +} + +func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } +func (sc *elemRuneTypeScanner) Reset() {} -- cgit v1.2.3-2-g168b