// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "errors" "io" "git.lukeshu.com/go/lowmemjson/internal" ) type runeTypeScanner interface { // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // // end of value but not file: (_, >0, RuneTypeEOF, nil) // end of both value and file: (_, 0, RuneTypeEOF, nil) // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) ReadRuneType() (rune, int, internal.RuneType, error) // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. ReadRune() (rune, int, error) UnreadRune() error Reset() InputOffset() int64 } // runeTypeScannerImpl ///////////////////////////////////////////////////////////////////////////// type runeTypeScannerImpl struct { // everything that is not "initialized by constructor" starts // out as the zero value. inner io.RuneScanner // initialized by constructor parser internal.Parser // initialized by constructor offset int64 initialized bool repeat bool rRune rune rSize int rType internal.RuneType rErr error } var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) func (sc *runeTypeScannerImpl) Reset() { sc.parser.Reset() if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { sc.repeat = false // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune) if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.offset - int64(sc.rSize), Err: err, } } else { sc.rErr = nil } // tell it to use that rType and rErr _ = sc.UnreadRune() // we set it up to always succeed } } func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) { switch { case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF): // do nothing case sc.repeat: _, _, _ = sc.inner.ReadRune() default: sc.initialized = true var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() sc.offset += int64(sc.rSize) switch err { case nil: sc.rType, err = sc.parser.HandleRune(sc.rRune) if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.offset - int64(sc.rSize), Err: err, } } else { sc.rErr = nil } case io.EOF: sc.rType, err = sc.parser.HandleEOF() if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.offset, Err: err, } } else { sc.rErr = nil } default: sc.rType = 0 sc.rErr = &DecodeReadError{ Offset: sc.offset, Err: err, } } } sc.repeat = false return sc.rRune, sc.rSize, sc.rType, sc.rErr } func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { r, s, t, e := sc.ReadRuneType() switch t { case internal.RuneTypeEOF: return 0, 0, io.EOF case internal.RuneTypeError: return 0, 0, e default: return r, s, nil } } // UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). // // If the last call to .ReadRune() or .ReadRuneType() has already been // unread, or if that call returned a rune with size 0, then // ErrInvalidUnreadRune is returned. Otherwise, nil is returned. func (sc *runeTypeScannerImpl) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } sc.repeat = true _ = sc.inner.UnreadRune() return nil } func (sc *runeTypeScannerImpl) InputOffset() int64 { ret := sc.offset if sc.repeat { ret -= int64(sc.rSize) } return ret } // noWSRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// type noWSRuneTypeScanner struct { inner runeTypeScanner } var _ runeTypeScanner = (*noWSRuneTypeScanner)(nil) func (sc *noWSRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { again: r, s, t, e := sc.inner.ReadRuneType() if t == internal.RuneTypeSpace { goto again } return r, s, t, e } func (sc *noWSRuneTypeScanner) ReadRune() (rune, int, error) { r, s, t, e := sc.ReadRuneType() switch t { case internal.RuneTypeEOF: return 0, 0, io.EOF case internal.RuneTypeError: return 0, 0, e default: return r, s, nil } } func (sc *noWSRuneTypeScanner) UnreadRune() error { return sc.inner.UnreadRune() } func (sc *noWSRuneTypeScanner) Reset() { sc.inner.Reset() } func (sc *noWSRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } // elemRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// type elemRuneTypeScanner struct { inner runeTypeScanner parser internal.Parser repeat bool stuck bool rType internal.RuneType rErr error } var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { // Read it, run it through the parent's parser. r, s, t, e := sc.inner.ReadRuneType() // Run it through our child parser. if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) { if sc.repeat || sc.stuck { sc.repeat = false } else { var err error if s > 0 { sc.rType, err = sc.parser.HandleRune(r) } else { sc.rType, err = sc.parser.HandleEOF() } if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.inner.InputOffset(), Err: err, } } else { sc.rErr = nil } } sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError t, e = sc.rType, sc.rErr } // Check if we need to truncate the result. if t == internal.RuneTypeEOF { if s > 0 { _ = sc.inner.UnreadRune() } return 0, 0, internal.RuneTypeEOF, nil } return r, s, t, e } func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { r, s, t, e := sc.ReadRuneType() switch t { case internal.RuneTypeEOF: return 0, 0, io.EOF case internal.RuneTypeError: return 0, 0, e default: return r, s, nil } } func (sc *elemRuneTypeScanner) UnreadRune() error { ret := sc.inner.UnreadRune() sc.repeat = true return ret } func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } func (sc *elemRuneTypeScanner) Reset() {}