From 4148776399cb7ea5e10c74dc465e4e1e682cb399 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 26 Jan 2023 00:07:39 -0700 Subject: Move the Parser to the internal package --- compat/json/compat_test.go | 6 +- decode.go | 184 ++++++------ decode_scan.go | 44 +-- decode_scan_test.go | 350 +++++++++++------------ encode.go | 2 +- errors.go | 4 +- internal/export.go | 16 -- internal/export_tags.go | 16 ++ internal/hex.go | 20 ++ internal/parse.go | 690 +++++++++++++++++++++++++++++++++++++++++++++ misc.go | 25 +- parse.go | 683 -------------------------------------------- reencode.go | 82 +++--- 13 files changed, 1073 insertions(+), 1049 deletions(-) delete mode 100644 internal/export.go create mode 100644 internal/export_tags.go create mode 100644 internal/hex.go create mode 100644 internal/parse.go delete mode 100644 parse.go diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index 997d07e..23ee977 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Luke Shumaker +// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later @@ -22,13 +22,13 @@ func checkValid(in []byte, scan *lowmemjson.ReEncoder) error { } func isValidNumber(s string) bool { - var parser lowmemjson.Parser + var parser internal.Parser for _, r := range s { if t, _ := parser.HandleRune(r); !t.IsNumber() { return false } } - if t, _ := parser.HandleEOF(); t == lowmemjson.RuneTypeError { + if t, _ := parser.HandleEOF(); t == internal.RuneTypeError { return false } return true diff --git a/decode.go b/decode.go index fb94ba8..51c1ed5 100644 --- a/decode.go +++ b/decode.go @@ -15,6 +15,8 @@ import ( "strings" "unicode/utf16" "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal" ) type Decodable interface { @@ -45,7 +47,7 @@ func NewDecoder(r io.RuneScanner) *Decoder { io: &noWSRuneTypeScanner{ inner: &runeTypeScannerImpl{ inner: r, - parser: Parser{ + parser: internal.Parser{ MaxDepth: maxNestingDepth, }, }, @@ -61,7 +63,7 @@ func (dec *Decoder) More() bool { dec.io.Reset() _, _, t, e := dec.io.ReadRuneType() _ = dec.io.UnreadRune() // best effort - return e == nil && t != RuneTypeEOF + return e == nil && t != internal.RuneTypeEOF } func (dec *Decoder) stackPush(par reflect.Type, idx any) { @@ -110,7 +112,7 @@ func (dec *Decoder) DecodeThenEOF(ptr any) (err error) { return err } c, s, t, _ := dec.io.ReadRuneType() - if t != RuneTypeEOF { + if t != internal.RuneTypeEOF { panic("should not happen") } if s > 0 { @@ -171,7 +173,7 @@ func (dec *Decoder) panicType(jTyp string, gTyp reflect.Type, err error) { }) } -func (dec *Decoder) readRune() (rune, RuneType) { +func (dec *Decoder) readRune() (rune, internal.RuneType) { c, _, t, e := dec.io.ReadRuneType() if e != nil { panic(decodeError{ @@ -192,23 +194,23 @@ func (dec *Decoder) unreadRune() { } } -func (dec *Decoder) peekRuneType() RuneType { +func (dec *Decoder) peekRuneType() internal.RuneType { _, t := dec.readRune() dec.unreadRune() return t } -func (dec *Decoder) expectRune(ec rune, et RuneType) { +func (dec *Decoder) expectRune(ec rune, et internal.RuneType) { ac, at := dec.readRune() if ac != ec || at != et { panic("should not happen") } } -func (dec *Decoder) expectRuneType(ec rune, et RuneType, gt reflect.Type) { +func (dec *Decoder) expectRuneType(ec rune, et internal.RuneType, gt reflect.Type) { ac, at := dec.readRune() if ac != ec || at != et { - dec.panicType(at.jsonType(), gt, nil) + dec.panicType(at.JSONType(), gt, nil) } } @@ -216,7 +218,7 @@ type decRuneTypeScanner struct { dec *Decoder } -func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { +func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { c, s, t, e := sc.dec.io.ReadRuneType() if e != nil { panic(decodeError{ @@ -231,7 +233,7 @@ func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { func (sc *decRuneTypeScanner) ReadRune() (rune, int, error) { r, s, t, _ := sc.ReadRuneType() switch t { - case RuneTypeEOF: + case internal.RuneTypeEOF: return 0, 0, io.EOF default: return r, s, nil @@ -287,17 +289,17 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { var buf bytes.Buffer dec.scan(&buf) if err := val.Addr().Interface().(*json.RawMessage).UnmarshalJSON(buf.Bytes()); err != nil { - dec.panicType(t.jsonType(), typ, err) + dec.panicType(t.JSONType(), typ, err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(decodableType): t := dec.peekRuneType() obj := val.Addr().Interface().(Decodable) l := dec.limitingScanner() if err := obj.DecodeJSON(l); err != nil { - dec.panicType(t.jsonType(), typ, err) + dec.panicType(t.JSONType(), typ, err) } if _, _, err := l.ReadRune(); err != io.EOF { - dec.panicType(t.jsonType(), typ, fmt.Errorf("did not consume entire %s", t.jsonType())) + dec.panicType(t.JSONType(), typ, fmt.Errorf("did not consume entire %s", t.JSONType())) } case val.CanAddr() && reflect.PointerTo(typ).Implements(jsonUnmarshalerType): t := dec.peekRuneType() @@ -305,10 +307,10 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.scan(&buf) obj := val.Addr().Interface().(json.Unmarshaler) if err := obj.UnmarshalJSON(buf.Bytes()); err != nil { - dec.panicType(t.jsonType(), typ, err) + dec.panicType(t.JSONType(), typ, err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(textUnmarshalerType): - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -321,13 +323,13 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { default: switch kind := typ.Kind(); kind { case reflect.Bool: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } val.SetBool(dec.decodeBool(typ)) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -339,7 +341,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } val.SetInt(n) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -351,7 +353,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } val.SetUint(n) case reflect.Float32, reflect.Float64: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -363,7 +365,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } val.SetFloat(n) case reflect.String: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -372,7 +374,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { t := dec.peekRuneType() dec.scan(&buf) if !t.IsNumber() { - dec.panicType(t.jsonType(), typ, + dec.panicType(t.JSONType(), typ, fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", buf.String())) } @@ -383,7 +385,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } case reflect.Interface: if typ.NumMethod() > 0 { - dec.panicType(dec.peekRuneType().jsonType(), typ, ErrDecodeNonEmptyInterface) + dec.panicType(dec.peekRuneType().JSONType(), typ, ErrDecodeNonEmptyInterface) } // If the interface stores a pointer, try to use the type information of the pointer. if !val.IsNil() && val.Elem().Kind() == reflect.Pointer { @@ -404,25 +406,25 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { // in the loop) because the only way it's possible is if there's // an interface in there, which'd break from the loop on its own. // - // ptr.CanSet() || dec.peekRuneType() != RuneTypeNullN + // ptr.CanSet() || dec.peekRuneType() != internal.RuneTypeNullN // // We only need the pointer itself to be settable if we're // decoding null. - if ptr.Elem() != val && (ptr.CanSet() || dec.peekRuneType() != RuneTypeNullN) { + if ptr.Elem() != val && (ptr.CanSet() || dec.peekRuneType() != internal.RuneTypeNullN) { dec.decode(ptr, false) break } } // Couldn't get type information from a pointer; fall back to untyped mode. switch dec.peekRuneType() { - case RuneTypeNullN: + case internal.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) default: val.Set(reflect.ValueOf(dec.decodeAny())) } case reflect.Struct: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -461,7 +463,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", fVal.Type().Elem())) } - if dec.peekRuneType() != RuneTypeNullN { + if dec.peekRuneType() != internal.RuneTypeNullN { if fVal.IsNil() { fVal.Set(reflect.New(fVal.Type().Elem())) } @@ -472,7 +474,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } if field.Quote { switch t := dec.peekRuneType(); t { - case RuneTypeNullN: + case internal.RuneTypeNullN: dec.decodeNull() switch fVal.Kind() { // XXX: I can't justify this list, other than "it's what encoding/json @@ -482,7 +484,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { default: // do nothing??? } - case RuneTypeStringBeg: + case internal.RuneTypeStringBeg: // TODO: Figure out how to do this without buffering, have correct offsets. var buf bytes.Buffer dec.decodeString(nil, &buf) @@ -494,7 +496,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } } default: - dec.panicType(t.jsonType(), fVal.Type(), + dec.panicType(t.JSONType(), fVal.Type(), fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", fVal.Type())) } @@ -504,10 +506,10 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { }) case reflect.Map: switch t := dec.peekRuneType(); t { - case RuneTypeNullN: + case internal.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) - case RuneTypeObjectBeg: + case internal.RuneTypeObjectBeg: if val.IsNil() { val.Set(reflect.MakeMap(typ)) } @@ -553,19 +555,19 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { val.SetMapIndex(nameValPtr.Elem(), fValPtr.Elem()) }) default: - dec.panicType(t.jsonType(), typ, nil) + dec.panicType(t.JSONType(), typ, nil) } case reflect.Slice: switch { - case typ.Elem().Kind() == reflect.Uint8 && !(dec.peekRuneType() == RuneTypeArrayBeg && (false || + case typ.Elem().Kind() == reflect.Uint8 && !(dec.peekRuneType() == internal.RuneTypeArrayBeg && (false || reflect.PointerTo(typ.Elem()).Implements(decodableType) || reflect.PointerTo(typ.Elem()).Implements(jsonUnmarshalerType) || reflect.PointerTo(typ.Elem()).Implements(textUnmarshalerType))): switch t := dec.peekRuneType(); t { - case RuneTypeNullN: + case internal.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) - case RuneTypeStringBeg: + case internal.RuneTypeStringBeg: if typ.Elem() == byteType { var buf bytes.Buffer dec.decodeString(typ, newBase64Decoder(&buf)) @@ -582,14 +584,14 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } } default: - dec.panicType(t.jsonType(), typ, nil) + dec.panicType(t.JSONType(), typ, nil) } default: switch t := dec.peekRuneType(); t { - case RuneTypeNullN: + case internal.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) - case RuneTypeArrayBeg: + case internal.RuneTypeArrayBeg: if val.IsNil() { val.Set(reflect.MakeSlice(typ, 0, 0)) } @@ -606,11 +608,11 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { i++ }) default: - dec.panicType(t.jsonType(), typ, nil) + dec.panicType(t.JSONType(), typ, nil) } } case reflect.Array: - if nullOK && dec.peekRuneType() == RuneTypeNullN { + if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { dec.decodeNull() return } @@ -633,7 +635,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } case reflect.Pointer: switch dec.peekRuneType() { - case RuneTypeNullN: + case internal.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) default: @@ -661,7 +663,7 @@ func (dec *Decoder) scan(out io.Writer) { func (dec *Decoder) scanNumber(gTyp reflect.Type, out io.Writer) { if t := dec.peekRuneType(); !t.IsNumber() { - dec.panicType(t.jsonType(), gTyp, nil) + dec.panicType(t.JSONType(), gTyp, nil) } dec.scan(out) } @@ -746,33 +748,33 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er t := dec.peekRuneType() l := dec.limitingScanner() if err := decodeVal(l); err != nil { - dec.panicType(t.jsonType(), nil, err) + dec.panicType(t.JSONType(), nil, err) } if _, _, err := l.ReadRune(); err != io.EOF { - dec.panicType(t.jsonType(), nil, fmt.Errorf("did not consume entire %s", t.jsonType())) + dec.panicType(t.JSONType(), nil, fmt.Errorf("did not consume entire %s", t.JSONType())) } }) return } func (dec *Decoder) decodeObject(gTyp reflect.Type, decodeKey, decodeVal func()) { - dec.expectRuneType('{', RuneTypeObjectBeg, gTyp) + dec.expectRuneType('{', internal.RuneTypeObjectBeg, gTyp) _, t := dec.readRune() switch t { - case RuneTypeObjectEnd: + case internal.RuneTypeObjectEnd: return - case RuneTypeStringBeg: + case internal.RuneTypeStringBeg: decodeMember: dec.unreadRune() decodeKey() - dec.expectRune(':', RuneTypeObjectColon) + dec.expectRune(':', internal.RuneTypeObjectColon) decodeVal() _, t := dec.readRune() switch t { - case RuneTypeObjectComma: - dec.expectRune('"', RuneTypeStringBeg) + case internal.RuneTypeObjectComma: + dec.expectRune('"', internal.RuneTypeStringBeg) goto decodeMember - case RuneTypeObjectEnd: + case internal.RuneTypeObjectEnd: return default: panic("should not happen") @@ -799,20 +801,20 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e t := dec.peekRuneType() l := dec.limitingScanner() if err := decodeMember(l); err != nil { - dec.panicType(t.jsonType(), nil, err) + dec.panicType(t.JSONType(), nil, err) } if _, _, err := l.ReadRune(); err != io.EOF { - dec.panicType(t.jsonType(), nil, fmt.Errorf("did not consume entire %s", t.jsonType())) + dec.panicType(t.JSONType(), nil, fmt.Errorf("did not consume entire %s", t.JSONType())) } }) return } func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { - dec.expectRuneType('[', RuneTypeArrayBeg, gTyp) + dec.expectRuneType('[', internal.RuneTypeArrayBeg, gTyp) _, t := dec.readRune() switch t { - case RuneTypeArrayEnd: + case internal.RuneTypeArrayEnd: return default: dec.unreadRune() @@ -820,9 +822,9 @@ func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { decodeMember() _, t := dec.readRune() switch t { - case RuneTypeArrayComma: + case internal.RuneTypeArrayComma: goto decodeNextMember - case RuneTypeArrayEnd: + case internal.RuneTypeArrayEnd: return default: panic("should not happen") @@ -831,16 +833,16 @@ func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { } func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { - dec.expectRuneType('"', RuneTypeStringBeg, gTyp) + dec.expectRuneType('"', internal.RuneTypeStringBeg, gTyp) var uhex [4]byte for { c, t := dec.readRune() switch t { - case RuneTypeStringChar: + case internal.RuneTypeStringChar: _, _ = writeRune(out, c) - case RuneTypeStringEsc, RuneTypeStringEscU: + case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: // do nothing - case RuneTypeStringEsc1: + case internal.RuneTypeStringEsc1: switch c { case '"': _, _ = writeRune(out, '"') @@ -861,14 +863,14 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { default: panic("should not happen") } - case RuneTypeStringEscUA: - uhex[0], _ = hex2int(c) - case RuneTypeStringEscUB: - uhex[1], _ = hex2int(c) - case RuneTypeStringEscUC: - uhex[2], _ = hex2int(c) - case RuneTypeStringEscUD: - uhex[3], _ = hex2int(c) + case internal.RuneTypeStringEscUA: + uhex[0], _ = internal.HexToInt(c) + case internal.RuneTypeStringEscUB: + uhex[1], _ = internal.HexToInt(c) + case internal.RuneTypeStringEscUC: + uhex[2], _ = internal.HexToInt(c) + case internal.RuneTypeStringEscUD: + uhex[3], _ = internal.HexToInt(c) c = 0 | rune(uhex[0])<<12 | rune(uhex[1])<<8 | @@ -876,25 +878,25 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { rune(uhex[3])<<0 handleUnicode: if utf16.IsSurrogate(c) { - if dec.peekRuneType() != RuneTypeStringEsc { + if dec.peekRuneType() != internal.RuneTypeStringEsc { _, _ = writeRune(out, utf8.RuneError) break } - dec.expectRune('\\', RuneTypeStringEsc) - if dec.peekRuneType() != RuneTypeStringEscU { + dec.expectRune('\\', internal.RuneTypeStringEsc) + if dec.peekRuneType() != internal.RuneTypeStringEscU { _, _ = writeRune(out, utf8.RuneError) break } - dec.expectRune('u', RuneTypeStringEscU) + dec.expectRune('u', internal.RuneTypeStringEscU) b, _ := dec.readRune() - uhex[0], _ = hex2int(b) + uhex[0], _ = internal.HexToInt(b) b, _ = dec.readRune() - uhex[1], _ = hex2int(b) + uhex[1], _ = internal.HexToInt(b) b, _ = dec.readRune() - uhex[2], _ = hex2int(b) + uhex[2], _ = internal.HexToInt(b) b, _ = dec.readRune() - uhex[3], _ = hex2int(b) + uhex[3], _ = internal.HexToInt(b) c2 := 0 | rune(uhex[0])<<12 | rune(uhex[1])<<8 | @@ -910,7 +912,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { } else { _, _ = writeRune(out, c) } - case RuneTypeStringEnd: + case internal.RuneTypeStringEnd: return default: panic("should not happen") @@ -922,25 +924,25 @@ func (dec *Decoder) decodeBool(gTyp reflect.Type) bool { c, t := dec.readRune() switch c { case 't': - dec.expectRune('r', RuneTypeTrueR) - dec.expectRune('u', RuneTypeTrueU) - dec.expectRune('e', RuneTypeTrueE) + dec.expectRune('r', internal.RuneTypeTrueR) + dec.expectRune('u', internal.RuneTypeTrueU) + dec.expectRune('e', internal.RuneTypeTrueE) return true case 'f': - dec.expectRune('a', RuneTypeFalseA) - dec.expectRune('l', RuneTypeFalseL) - dec.expectRune('s', RuneTypeFalseS) - dec.expectRune('e', RuneTypeFalseE) + dec.expectRune('a', internal.RuneTypeFalseA) + dec.expectRune('l', internal.RuneTypeFalseL) + dec.expectRune('s', internal.RuneTypeFalseS) + dec.expectRune('e', internal.RuneTypeFalseE) return false default: - dec.panicType(t.jsonType(), gTyp, nil) + dec.panicType(t.JSONType(), gTyp, nil) panic("not reached") } } func (dec *Decoder) decodeNull() { - dec.expectRune('n', RuneTypeNullN) - dec.expectRune('u', RuneTypeNullU) - dec.expectRune('l', RuneTypeNullL1) - dec.expectRune('l', RuneTypeNullL2) + dec.expectRune('n', internal.RuneTypeNullN) + dec.expectRune('u', internal.RuneTypeNullU) + dec.expectRune('l', internal.RuneTypeNullL1) + dec.expectRune('l', internal.RuneTypeNullL2) } diff --git a/decode_scan.go b/decode_scan.go index eee61fc..5e33760 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Luke Shumaker +// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later @@ -7,6 +7,8 @@ package lowmemjson import ( "errors" "io" + + "git.lukeshu.com/go/lowmemjson/internal" ) type runeTypeScanner interface { @@ -17,7 +19,7 @@ type runeTypeScanner interface { // end of both value and file: (_, 0, RuneTypeEOF, nil) // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) - ReadRuneType() (rune, int, RuneType, error) + ReadRuneType() (rune, int, internal.RuneType, error) // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. ReadRune() (rune, int, error) UnreadRune() error @@ -32,7 +34,7 @@ type runeTypeScannerImpl struct { initialized bool - parser Parser + parser internal.Parser offset int64 repeat bool @@ -40,7 +42,7 @@ type runeTypeScannerImpl struct { rRune rune rRuneOK bool rSize int - rType RuneType + rType internal.RuneType rErr error } @@ -48,7 +50,7 @@ var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) func (sc *runeTypeScannerImpl) Reset() { sc.parser.Reset() - unread := sc.stuck && sc.rType == RuneTypeEOF && sc.rRuneOK + unread := sc.stuck && sc.rType == internal.RuneTypeEOF && sc.rRuneOK sc.stuck = false sc.repeat = false if unread { @@ -68,7 +70,7 @@ func (sc *runeTypeScannerImpl) Reset() { } } -func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, RuneType, error) { +func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) { switch { case sc.stuck: // do nothing @@ -110,16 +112,16 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, RuneType, error) { } sc.initialized = true sc.repeat = false - sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError + sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError return sc.rRune, sc.rSize, sc.rType, sc.rErr } func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { r, s, t, e := sc.ReadRuneType() switch t { - case RuneTypeEOF: + case internal.RuneTypeEOF: return 0, 0, io.EOF - case RuneTypeError: + case internal.RuneTypeError: return 0, 0, e default: return r, s, nil @@ -156,10 +158,10 @@ type noWSRuneTypeScanner struct { var _ runeTypeScanner = (*noWSRuneTypeScanner)(nil) -func (sc *noWSRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { +func (sc *noWSRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { again: r, s, t, e := sc.inner.ReadRuneType() - if t == RuneTypeSpace { + if t == internal.RuneTypeSpace { goto again } return r, s, t, e @@ -168,9 +170,9 @@ again: func (sc *noWSRuneTypeScanner) ReadRune() (rune, int, error) { r, s, t, e := sc.ReadRuneType() switch t { - case RuneTypeEOF: + case internal.RuneTypeEOF: return 0, 0, io.EOF - case RuneTypeError: + case internal.RuneTypeError: return 0, 0, e default: return r, s, nil @@ -186,16 +188,16 @@ func (sc *noWSRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset type elemRuneTypeScanner struct { inner runeTypeScanner - parser Parser + parser internal.Parser repeat bool stuck bool - rType RuneType + rType internal.RuneType rErr error } var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) -func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { +func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { // Read it, run it through the parent's parser. r, s, t, e := sc.inner.ReadRuneType() @@ -219,16 +221,16 @@ func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { sc.rErr = nil } } - sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError + sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError t, e = sc.rType, sc.rErr } // Check if we need to truncate the result. - if t == RuneTypeEOF { + if t == internal.RuneTypeEOF { if s > 0 { _ = sc.inner.UnreadRune() } - return 0, 0, RuneTypeEOF, nil + return 0, 0, internal.RuneTypeEOF, nil } return r, s, t, e @@ -237,9 +239,9 @@ func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { r, s, t, e := sc.ReadRuneType() switch t { - case RuneTypeEOF: + case internal.RuneTypeEOF: return 0, 0, io.EOF - case RuneTypeError: + case internal.RuneTypeError: return 0, 0, e default: return r, s, nil diff --git a/decode_scan_test.go b/decode_scan_test.go index 70e2874..ac8c2fc 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Luke Shumaker +// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later @@ -12,12 +12,14 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "git.lukeshu.com/go/lowmemjson/internal" ) type ReadRuneTypeResult struct { r rune s int - t RuneType + t internal.RuneType e error } @@ -57,105 +59,105 @@ func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestc func TestRuneTypeScanner(t *testing.T) { testcases := map[string]runeTypeScannerTestcase{ "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {' ', 1, RuneTypeSpace, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {' ', 1, internal.RuneTypeSpace, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {' ', 1, RuneTypeSpace, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {' ', 1, internal.RuneTypeSpace, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, -1, 0, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "unread2": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {' ', 1, RuneTypeSpace, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {' ', 1, internal.RuneTypeSpace, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, -1, 0, nil}, {0, -1, 0, ErrInvalidUnreadRune}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {' ', 1, RuneTypeSpace, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {' ', 1, internal.RuneTypeSpace, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, {0, -1, 0, ErrInvalidUnreadRune}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, RuneTypeArrayBeg, nil}, - {'[', 1, RuneTypeArrayBeg, nil}, - {'0', 1, RuneTypeNumberIntZero, nil}, - {',', 1, RuneTypeArrayComma, nil}, - {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'0', 1, internal.RuneTypeNumberIntZero, nil}, + {',', 1, internal.RuneTypeArrayComma, nil}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ - {'1', 1, RuneTypeNumberIntDig, nil}, - {'{', 1, RuneTypeEOF, nil}, - {'{', 1, RuneTypeEOF, nil}, - {'{', 1, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'{', 1, internal.RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeEOF, nil}, }}, "early-eof": {`{`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, }}, "empty": {``, ``, []ReadRuneTypeResult{ - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, }}, } testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { @@ -168,66 +170,66 @@ func TestRuneTypeScanner(t *testing.T) { func TestNoWSRuneTypeScanner(t *testing.T) { testcases := map[string]runeTypeScannerTestcase{ "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, -1, 0, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "tail": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ - {'1', 1, RuneTypeNumberIntDig, nil}, - {'{', 1, RuneTypeEOF, nil}, - {'{', 1, RuneTypeEOF, nil}, - {'{', 1, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'{', 1, internal.RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeEOF, nil}, + {'{', 1, internal.RuneTypeEOF, nil}, }}, "early-eof": {` {`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, }}, } testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { @@ -242,37 +244,37 @@ func TestNoWSRuneTypeScanner(t *testing.T) { func TestElemRuneTypeScanner(t *testing.T) { testcases := map[string]runeTypeScannerTestcase{ "basic": {`1`, ``, []ReadRuneTypeResult{ - {'1', 1, RuneTypeNumberIntDig, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, RuneTypeArrayBeg, nil}, - {'[', 1, RuneTypeArrayBeg, nil}, - {'0', 1, RuneTypeNumberIntZero, nil}, - {',', 1, RuneTypeArrayComma, nil}, - {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'0', 1, internal.RuneTypeNumberIntZero, nil}, + {',', 1, internal.RuneTypeArrayComma, nil}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{ - {'1', 1, RuneTypeNumberIntDig, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "fragment": {`1,`, `,`, []ReadRuneTypeResult{ - {'1', 1, RuneTypeNumberIntDig, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, }}, "early-eof": {`{`, ``, []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, }}, } t.Run("top-level", func(t *testing.T) { @@ -306,7 +308,7 @@ func TestElemRuneTypeScanner(t *testing.T) { var res ReadRuneTypeResult res.r, res.s, res.t, res.e = inner.ReadRuneType() require.Equal(t, - ReadRuneTypeResult{'[', 1, RuneTypeArrayBeg, nil}.String(), + ReadRuneTypeResult{'[', 1, internal.RuneTypeArrayBeg, nil}.String(), res.String()) return &elemRuneTypeScanner{ @@ -323,13 +325,13 @@ func TestElemRuneTypeScanner2(t *testing.T) { }, } exp := []ReadRuneTypeResult{ - {'{', 1, RuneTypeObjectBeg, nil}, - {'"', 1, RuneTypeStringBeg, nil}, - {'f', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'o', 1, RuneTypeStringChar, nil}, - {'"', 1, RuneTypeStringEnd, nil}, - {':', 1, RuneTypeObjectColon, nil}, + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, } var expStr, actStr []string for _, iExp := range exp { @@ -344,12 +346,12 @@ func TestElemRuneTypeScanner2(t *testing.T) { inner: parent, } exp = []ReadRuneTypeResult{ - {'1', 1, RuneTypeNumberIntDig, nil}, - {'2', 1, RuneTypeNumberIntDig, nil}, - {'.', 1, RuneTypeNumberFracDot, nil}, - {'0', 1, RuneTypeNumberFracDig, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, } expStr, actStr = nil, nil for _, iExp := range exp { @@ -361,9 +363,9 @@ func TestElemRuneTypeScanner2(t *testing.T) { } exp = []ReadRuneTypeResult{ - {'}', 1, RuneTypeObjectEnd, nil}, - {0, 0, RuneTypeEOF, nil}, - {0, 0, RuneTypeEOF, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, } expStr, actStr = nil, nil for _, iExp := range exp { diff --git a/encode.go b/encode.go index 85584ef..6963e3c 100644 --- a/encode.go +++ b/encode.go @@ -61,7 +61,7 @@ func NewEncoder(w io.Writer) *Encoder { } return &Encoder{ w: re, - closeAfterEncode: len(re.par.stack) == 0 || (len(re.par.stack) == 1 && re.par.stack[0] == RuneTypeError), + closeAfterEncode: re.par.StackIsEmpty(), } } diff --git a/errors.go b/errors.go index a2d88bf..67fe6c9 100644 --- a/errors.go +++ b/errors.go @@ -10,6 +10,8 @@ import ( "fmt" "reflect" "strings" + + "git.lukeshu.com/go/lowmemjson/internal" ) var ( @@ -19,7 +21,7 @@ var ( // parser errors /////////////////////////////////////////////////////////////////////////////////// var ( - ErrParserExceededMaxDepth = errors.New("exceeded max depth") + ErrParserExceededMaxDepth = internal.ErrParserExceededMaxDepth ) // low-level decode errors ///////////////////////////////////////////////////////////////////////// diff --git a/internal/export.go b/internal/export.go deleted file mode 100644 index d8cf622..0000000 --- a/internal/export.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "io" -) - -var ParseTag = parseTag - -var ( - EncodeStringFromBytes func(io.Writer, []byte) - EncodeStringFromString func(io.Writer, string) -) diff --git a/internal/export_tags.go b/internal/export_tags.go new file mode 100644 index 0000000..d8cf622 --- /dev/null +++ b/internal/export_tags.go @@ -0,0 +1,16 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "io" +) + +var ParseTag = parseTag + +var ( + EncodeStringFromBytes func(io.Writer, []byte) + EncodeStringFromString func(io.Writer, string) +) diff --git a/internal/hex.go b/internal/hex.go new file mode 100644 index 0000000..9ef78eb --- /dev/null +++ b/internal/hex.go @@ -0,0 +1,20 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +const Hex = "0123456789abcdef" + +func HexToInt[T interface{ byte | rune }](c T) (byte, bool) { + switch { + case '0' <= c && c <= '9': + return byte(c) - '0', true + case 'a' <= c && c <= 'f': + return byte(c) - 'a' + 10, true + case 'A' <= c && c <= 'F': + return byte(c) - 'A' + 10, true + default: + return 0, false + } +} diff --git a/internal/parse.go b/internal/parse.go new file mode 100644 index 0000000..12d7600 --- /dev/null +++ b/internal/parse.go @@ -0,0 +1,690 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "errors" + "fmt" + "io" + iofs "io/fs" + "strings" +) + +var ErrParserExceededMaxDepth = errors.New("exceeded max depth") + +type RuneType uint8 + +const ( + RuneTypeError = RuneType(iota) + + RuneTypeSpace // whitespace + + RuneTypeObjectBeg // '{' + RuneTypeObjectColon // ':' + RuneTypeObjectComma // ',' + RuneTypeObjectEnd // '}' + + RuneTypeArrayBeg // '[' + RuneTypeArrayComma // ',' + RuneTypeArrayEnd // ']' + + RuneTypeStringBeg // opening '"' + RuneTypeStringChar // normal character + RuneTypeStringEsc // backslash + RuneTypeStringEsc1 // single-char after a backslash + RuneTypeStringEscU // \uABCD : u + RuneTypeStringEscUA // \uABCD : A + RuneTypeStringEscUB // \uABCD : B + RuneTypeStringEscUC // \uABCD : C + RuneTypeStringEscUD // \uABCD : D + RuneTypeStringEnd // closing '"' + + RuneTypeNumberIntNeg + RuneTypeNumberIntZero + RuneTypeNumberIntDig + RuneTypeNumberFracDot + RuneTypeNumberFracDig + RuneTypeNumberExpE + RuneTypeNumberExpSign + RuneTypeNumberExpDig + + RuneTypeTrueT + RuneTypeTrueR + RuneTypeTrueU + RuneTypeTrueE + + RuneTypeFalseF + RuneTypeFalseA + RuneTypeFalseL + RuneTypeFalseS + RuneTypeFalseE + + RuneTypeNullN + RuneTypeNullU + RuneTypeNullL1 + RuneTypeNullL2 + + RuneTypeEOF +) + +func (t RuneType) GoString() string { + str, ok := map[RuneType]string{ + RuneTypeError: "RuneTypeError", + + RuneTypeSpace: "RuneTypeSpace", + + RuneTypeObjectBeg: "RuneTypeObjectBeg", + RuneTypeObjectColon: "RuneTypeObjectColon", + RuneTypeObjectComma: "RuneTypeObjectComma", + RuneTypeObjectEnd: "RuneTypeObjectEnd", + + RuneTypeArrayBeg: "RuneTypeArrayBeg", + RuneTypeArrayComma: "RuneTypeArrayComma", + RuneTypeArrayEnd: "RuneTypeArrayEnd", + + RuneTypeStringBeg: "RuneTypeStringBeg", + RuneTypeStringChar: "RuneTypeStringChar", + RuneTypeStringEsc: "RuneTypeStringEsc", + RuneTypeStringEsc1: "RuneTypeStringEsc1", + RuneTypeStringEscU: "RuneTypeStringEscU", + RuneTypeStringEscUA: "RuneTypeStringEscUA", + RuneTypeStringEscUB: "RuneTypeStringEscUB", + RuneTypeStringEscUC: "RuneTypeStringEscUC", + RuneTypeStringEscUD: "RuneTypeStringEscUD", + RuneTypeStringEnd: "RuneTypeStringEnd", + + RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", + RuneTypeNumberIntZero: "RuneTypeNumberIntZero", + RuneTypeNumberIntDig: "RuneTypeNumberIntDig", + RuneTypeNumberFracDot: "RuneTypeNumberFracDot", + RuneTypeNumberFracDig: "RuneTypeNumberFracDig", + RuneTypeNumberExpE: "RuneTypeNumberExpE", + RuneTypeNumberExpSign: "RuneTypeNumberExpSign", + RuneTypeNumberExpDig: "RuneTypeNumberExpDig", + + RuneTypeTrueT: "RuneTypeTrueT", + RuneTypeTrueR: "RuneTypeTrueR", + RuneTypeTrueU: "RuneTypeTrueU", + RuneTypeTrueE: "RuneTypeTrueE", + + RuneTypeFalseF: "RuneTypeFalseF", + RuneTypeFalseA: "RuneTypeFalseA", + RuneTypeFalseL: "RuneTypeFalseL", + RuneTypeFalseS: "RuneTypeFalseS", + RuneTypeFalseE: "RuneTypeFalseE", + + RuneTypeNullN: "RuneTypeNullN", + RuneTypeNullU: "RuneTypeNullU", + RuneTypeNullL1: "RuneTypeNullL1", + RuneTypeNullL2: "RuneTypeNullL2", + + RuneTypeEOF: "RuneTypeEOF", + }[t] + if ok { + return str + } + return fmt.Sprintf("RuneType(%d)", t) +} + +func (t RuneType) String() string { + str, ok := map[RuneType]string{ + RuneTypeError: "x", + + RuneTypeSpace: " ", + + RuneTypeObjectBeg: "{", + RuneTypeObjectColon: ":", + RuneTypeObjectComma: "o", + RuneTypeObjectEnd: "}", + + RuneTypeArrayBeg: "[", + RuneTypeArrayComma: "a", + RuneTypeArrayEnd: "]", + + RuneTypeStringBeg: "โ€œ", + RuneTypeStringChar: "c", + RuneTypeStringEsc: "\\", + RuneTypeStringEsc1: "b", + RuneTypeStringEscU: "u", + RuneTypeStringEscUA: "A", + RuneTypeStringEscUB: "B", + RuneTypeStringEscUC: "C", + RuneTypeStringEscUD: "D", + RuneTypeStringEnd: "โ€", + + RuneTypeNumberIntNeg: "-", + RuneTypeNumberIntZero: "0", + RuneTypeNumberIntDig: "1", + RuneTypeNumberFracDot: ".", + RuneTypeNumberFracDig: "2", + RuneTypeNumberExpE: "e", + RuneTypeNumberExpSign: "+", + RuneTypeNumberExpDig: "3", + + RuneTypeTrueT: "๐•ฅ", // double-struck + RuneTypeTrueR: "๐•ฃ", + RuneTypeTrueU: "๐•ฆ", + RuneTypeTrueE: "๐•–", + + RuneTypeFalseF: "๐”ฃ", // fraktur + RuneTypeFalseA: "๐”ž", + RuneTypeFalseL: "๐”ฉ", + RuneTypeFalseS: "๐”ฐ", + RuneTypeFalseE: "๐”ข", + + RuneTypeNullN: "โ“", // circled + RuneTypeNullU: "โ“ค", + RuneTypeNullL1: "โ“›", + RuneTypeNullL2: "โ“", // +uppercase + + RuneTypeEOF: "$", + }[t] + if ok { + return str + } + return fmt.Sprintf("<%d>", t) +} + +func (t RuneType) JSONType() string { + return map[RuneType]string{ + RuneTypeObjectBeg: "object", + RuneTypeArrayBeg: "array", + RuneTypeStringBeg: "string", + RuneTypeNumberIntNeg: "number", + RuneTypeNumberIntZero: "number", + RuneTypeNumberIntDig: "number", + RuneTypeTrueT: "true", + RuneTypeFalseF: "false", + RuneTypeNullN: "null", + RuneTypeEOF: "eof", + }[t] +} + +func (t RuneType) IsNumber() bool { + return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig +} + +type Parser struct { + // Setting MaxError to a value greater than 0 causes + // HandleRune to return ErrParserExceededMaxDepth if + // objects/arrays become nested more deeply than this. + MaxDepth int + + initialized bool + + err error + closed bool + + // We reuse RuneTypes to store the stack. The base idea is + // that, stack items are "the most recently read + // stack-relevant RuneType". + // + // We treat RuneTypeError as a wildcard. + // + // The "normal"stack-relevant RuneTypes are: + // + // โ€œ\uABC for strings + // -01.2e+3 for numbers + // ๐•ฅ๐•ฃ๐•ฆ for "true" + // ๐”ฃ๐”ž๐”ฉ๐”ฐ for "false" + // โ“โ“คโ“› for "null" + // + // Objects and arrays break the "most recently read RuneType" + // rule; they need some special assignments: + // + // { object: waiting for key to start or '}' + // โ€ object: reading key / waiting for colon + // : object: waiting for value to start + // o object: reading value / waiting for ',' or '}' + // + // [ array: waiting for item to start or ']' + // a array: reading item / waiting for ',' or ']' + // ] array: waiting for item to start + // + // Within each element type, the stack item is replaced, not pushed. + // + // For example, given the input string + // + // {"x":"y","a":"b"} + // + // The stack would be + // + // stack processed + // x + // { { + // โ€โ€œ {" + // โ€โ€œ {"x + // โ€ {"x" + // : {"x": + // oโ€œ {"x":" + // oโ€œ {"x":"y + // o {"x":"y" + // { {"x":"y", + // โ€โ€œ {"x":"y"," + // โ€โ€œ {"x":"y","a + // โ€ {"x":"y","a" + // : {"x":"y","a": + // oโ€œ {"x":"y","a":" + // oโ€œ {"x":"y","a":"b + // o {"x":"y","a":"b" + // {"x":"y","a":"b"} + stack []RuneType +} + +func (par *Parser) pushState(state RuneType) RuneType { + par.stack = append(par.stack, state) + return state +} +func (par *Parser) replaceState(state RuneType) RuneType { + par.stack[len(par.stack)-1] = state + return state +} +func (par *Parser) popState() { + par.stack = par.stack[:len(par.stack)-1] +} + +func (par *Parser) stackString() string { + var buf strings.Builder + for _, s := range par.stack { + buf.WriteString(s.String()) + } + return buf.String() +} + +func (par *Parser) StackIsEmpty() bool { + return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == RuneTypeError) +} + +// Reset all Parser state. +func (par *Parser) Reset() { + *par = Parser{ + MaxDepth: par.MaxDepth, + } +} + +// HandleEOF feeds EOF to the Parser. The returned RuneType is either +// RuneTypeEOF or RuneTypeError. +// +// An error is returned if and only if the RuneType is RuneTypeError. +// Returns io/fs.ErrClosed if .HandleEOF() has previously been called +// (and .Reset() has not been called since). +// +// Once RuneTypeError or RuneTypeEOF has been returned, it will keep +// being returned from both .HandleRune(c) and .HandleEOF() until +// .Reset() is called. +// +// RuneTypeEOF indicates that a complete JSON document has been read. +func (par *Parser) HandleEOF() (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed + } + defer func() { + par.closed = true + }() + if par.err != nil { + return RuneTypeError, par.err + } + if !par.initialized { + par.initialized = true + par.pushState(RuneTypeError) + } + switch len(par.stack) { + case 0: + return RuneTypeEOF, nil + case 1: + switch { + case par.stack[0].IsNumber(): + if _, err := par.HandleRune('\n'); err == nil { + return RuneTypeEOF, nil + } + case par.stack[0] == RuneTypeError: + par.err = io.EOF + return RuneTypeError, par.err + } + fallthrough + default: + par.err = io.ErrUnexpectedEOF + return RuneTypeError, par.err + } +} + +// HandleRune feeds a Unicode rune to the Parser. +// +// An error is returned if and only if the RuneType is RuneTypeError. +// Returns io/fs.ErrClosed if .HandleEOF() has previously been called +// (and .Reset() has not been called since). +// +// Once RuneTypeError or RuneTypeEOF has been returned, it will keep +// being returned from both .HandleRune(c) and .HandleEOF() until +// .Reset() is called. +// +// RuneTypeEOF indicates that the rune cannot be appended to the JSON +// document; a new JSON document must be started in order to process +// that rune. +func (par *Parser) HandleRune(c rune) (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed + } + if par.err != nil { + return RuneTypeError, par.err + } + if !par.initialized { + par.initialized = true + par.pushState(RuneTypeError) + } + if len(par.stack) == 0 { + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + default: + return RuneTypeEOF, nil + } + } + switch par.stack[len(par.stack)-1] { + // any ///////////////////////////////////////////////////////////////////////////////////// + case RuneTypeError: + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case '{': + if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + return RuneTypeError, ErrParserExceededMaxDepth + } + return par.replaceState(RuneTypeObjectBeg), nil + case '[': + if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + return RuneTypeError, ErrParserExceededMaxDepth + } + return par.replaceState(RuneTypeArrayBeg), nil + case '"': + return par.replaceState(RuneTypeStringBeg), nil + case '-': + return par.replaceState(RuneTypeNumberIntNeg), nil + case '0': + return par.replaceState(RuneTypeNumberIntZero), nil + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + case 't': + return par.replaceState(RuneTypeTrueT), nil + case 'f': + return par.replaceState(RuneTypeFalseF), nil + case 'n': + return par.replaceState(RuneTypeNullN), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c) + } + // object ////////////////////////////////////////////////////////////////////////////////// + case RuneTypeObjectBeg: // waiting for key to start or '}' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case '"': + par.replaceState(RuneTypeStringEnd) + return par.pushState(RuneTypeStringBeg), nil + case '}': + par.popState() + return RuneTypeObjectEnd, nil + default: + return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) + } + case RuneTypeStringEnd: // waiting for ':' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ':': + par.replaceState(RuneTypeObjectComma) + par.pushState(RuneTypeError) + return RuneTypeObjectColon, nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q after object key", c) + } + case RuneTypeObjectComma: // waiting for ',' or '}' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ',': + par.replaceState(RuneTypeObjectBeg) + return RuneTypeObjectComma, nil + case '}': + par.popState() + return RuneTypeObjectEnd, nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c) + } + // array /////////////////////////////////////////////////////////////////////////////////// + case RuneTypeArrayBeg: // waiting for item to start or ']' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ']': + par.popState() + return RuneTypeArrayEnd, nil + default: + par.replaceState(RuneTypeArrayComma) + par.pushState(RuneTypeError) + return par.HandleRune(c) + } + case RuneTypeArrayEnd: // waiting for item + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + default: + par.replaceState(RuneTypeArrayComma) + par.pushState(RuneTypeError) + return par.HandleRune(c) + } + case RuneTypeArrayComma: // waiting for ',' or ']' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ',': + par.replaceState(RuneTypeArrayEnd) + return RuneTypeArrayComma, nil + case ']': + par.popState() + return RuneTypeArrayEnd, nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q after array element", c) + } + // string ////////////////////////////////////////////////////////////////////////////////// + case RuneTypeStringBeg: // waiting for char or '"' + switch { + case c == '\\': + return par.replaceState(RuneTypeStringEsc), nil + case c == '"': + par.popState() + return RuneTypeStringEnd, nil + case 0x0020 <= c && c <= 0x10FFFF: + return RuneTypeStringChar, nil + default: + return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) + } + case RuneTypeStringEsc: // waiting for escape char + switch c { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEsc1, nil + case 'u': + return par.replaceState(RuneTypeStringEscU), nil + default: + return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) + } + case RuneTypeStringEscU: + if _, ok := HexToInt(c); ok { + return par.replaceState(RuneTypeStringEscUA), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUA: + if _, ok := HexToInt(c); ok { + return par.replaceState(RuneTypeStringEscUB), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUB: + if _, ok := HexToInt(c); ok { + return par.replaceState(RuneTypeStringEscUC), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUC: + if _, ok := HexToInt(c); ok { + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + // number ////////////////////////////////////////////////////////////////////////////////// + // + // Here's a flattened drawing of the syntax diagram from www.json.org : + // + // [------------ integer ----------][-- fraction ---][-------- exponent -------] + // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> + // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ + // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ + // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ + // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ + // โ”‚ โ”‚ + // โ•ฐโ”€"+"โ”€โ•ฏ + // + // Now here it is slightly redrawn, and with each distinct state our + // parser can be in marked with a single-capital-letter: + // + // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] + // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> + // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ + // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ + // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ + // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ + // โ”‚ โ”‚ + // โ•ฐโ”€"+"โ”€โ•ฏ + // + // You may notice that each of these states may be uniquely identified + // by the last-read RuneType: + // + // A = (nothing yet) + // B = IntNeg + // C = IntZero + // D = IntDig + // E = FracDot + // F = FracDig + // G = ExpE + // H = ExpSign + // I = ExpDig + // + // The 'A' state is part of the RuneTypeError "any" case + // above, and the remainder follow: + case RuneTypeNumberIntNeg: // B + switch c { + case '0': + return par.replaceState(RuneTypeNumberIntZero), nil + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberIntZero: // C + switch c { + case '.': + return par.replaceState(RuneTypeNumberFracDot), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.HandleRune(c) + } + case RuneTypeNumberIntDig: // D + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + case '.': + return par.replaceState(RuneTypeNumberFracDot), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.HandleRune(c) + } + case RuneTypeNumberFracDot: // E + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberFracDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberFracDig: // F + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberFracDig), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.HandleRune(c) + } + case RuneTypeNumberExpE: // G + switch c { + case '-', '+': + return par.replaceState(RuneTypeNumberExpSign), nil + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberExpSign: // H + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberExpDig: // I + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + par.popState() + return par.HandleRune(c) + } + // literals //////////////////////////////////////////////////////////////////////////////// + // true + case RuneTypeTrueT: + return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) + case RuneTypeTrueR: + return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) + case RuneTypeTrueU: + return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) + // false + case RuneTypeFalseF: + return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) + case RuneTypeFalseA: + return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) + case RuneTypeFalseL: + return par.expectRune(c, 's', RuneTypeFalseS, "false", false) + case RuneTypeFalseS: + return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) + // null + case RuneTypeNullN: + return par.expectRune(c, 'u', RuneTypeNullU, "null", false) + case RuneTypeNullU: + return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) + case RuneTypeNullL1: + return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) + default: + panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString())) + } +} + +func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { + if c != exp { + return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp) + } + if pop { + par.popState() + return typ, nil + } else { + return par.replaceState(typ), nil + } +} diff --git a/misc.go b/misc.go index 89b38a2..4f8e55e 100644 --- a/misc.go +++ b/misc.go @@ -9,22 +9,9 @@ import ( "io" "reflect" "unicode/utf8" -) - -const hex = "0123456789abcdef" -func hex2int[T interface{ byte | rune }](c T) (byte, bool) { - switch { - case '0' <= c && c <= '9': - return byte(c) - '0', true - case 'a' <= c && c <= 'f': - return byte(c) - 'a' + 10, true - case 'A' <= c && c <= 'F': - return byte(c) - 'A' + 10, true - default: - return 0, false - } -} + "git.lukeshu.com/go/lowmemjson/internal" +) var ( numberType = reflect.TypeOf(json.Number("")) @@ -115,10 +102,10 @@ func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { buf := [6]byte{ '\\', 'u', - hex[(c>>12)&0xf], - hex[(c>>8)&0xf], - hex[(c>>4)&0xf], - hex[(c>>0)&0xf], + internal.Hex[(c>>12)&0xf], + internal.Hex[(c>>8)&0xf], + internal.Hex[(c>>4)&0xf], + internal.Hex[(c>>0)&0xf], } return w.Write(buf[:]) } diff --git a/parse.go b/parse.go deleted file mode 100644 index 3fa6978..0000000 --- a/parse.go +++ /dev/null @@ -1,683 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "fmt" - "io" - iofs "io/fs" - "strings" -) - -type RuneType uint8 - -const ( - RuneTypeError = RuneType(iota) - - RuneTypeSpace // whitespace - - RuneTypeObjectBeg // '{' - RuneTypeObjectColon // ':' - RuneTypeObjectComma // ',' - RuneTypeObjectEnd // '}' - - RuneTypeArrayBeg // '[' - RuneTypeArrayComma // ',' - RuneTypeArrayEnd // ']' - - RuneTypeStringBeg // opening '"' - RuneTypeStringChar // normal character - RuneTypeStringEsc // backslash - RuneTypeStringEsc1 // single-char after a backslash - RuneTypeStringEscU // \uABCD : u - RuneTypeStringEscUA // \uABCD : A - RuneTypeStringEscUB // \uABCD : B - RuneTypeStringEscUC // \uABCD : C - RuneTypeStringEscUD // \uABCD : D - RuneTypeStringEnd // closing '"' - - RuneTypeNumberIntNeg - RuneTypeNumberIntZero - RuneTypeNumberIntDig - RuneTypeNumberFracDot - RuneTypeNumberFracDig - RuneTypeNumberExpE - RuneTypeNumberExpSign - RuneTypeNumberExpDig - - RuneTypeTrueT - RuneTypeTrueR - RuneTypeTrueU - RuneTypeTrueE - - RuneTypeFalseF - RuneTypeFalseA - RuneTypeFalseL - RuneTypeFalseS - RuneTypeFalseE - - RuneTypeNullN - RuneTypeNullU - RuneTypeNullL1 - RuneTypeNullL2 - - RuneTypeEOF -) - -func (t RuneType) GoString() string { - str, ok := map[RuneType]string{ - RuneTypeError: "RuneTypeError", - - RuneTypeSpace: "RuneTypeSpace", - - RuneTypeObjectBeg: "RuneTypeObjectBeg", - RuneTypeObjectColon: "RuneTypeObjectColon", - RuneTypeObjectComma: "RuneTypeObjectComma", - RuneTypeObjectEnd: "RuneTypeObjectEnd", - - RuneTypeArrayBeg: "RuneTypeArrayBeg", - RuneTypeArrayComma: "RuneTypeArrayComma", - RuneTypeArrayEnd: "RuneTypeArrayEnd", - - RuneTypeStringBeg: "RuneTypeStringBeg", - RuneTypeStringChar: "RuneTypeStringChar", - RuneTypeStringEsc: "RuneTypeStringEsc", - RuneTypeStringEsc1: "RuneTypeStringEsc1", - RuneTypeStringEscU: "RuneTypeStringEscU", - RuneTypeStringEscUA: "RuneTypeStringEscUA", - RuneTypeStringEscUB: "RuneTypeStringEscUB", - RuneTypeStringEscUC: "RuneTypeStringEscUC", - RuneTypeStringEscUD: "RuneTypeStringEscUD", - RuneTypeStringEnd: "RuneTypeStringEnd", - - RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", - RuneTypeNumberIntZero: "RuneTypeNumberIntZero", - RuneTypeNumberIntDig: "RuneTypeNumberIntDig", - RuneTypeNumberFracDot: "RuneTypeNumberFracDot", - RuneTypeNumberFracDig: "RuneTypeNumberFracDig", - RuneTypeNumberExpE: "RuneTypeNumberExpE", - RuneTypeNumberExpSign: "RuneTypeNumberExpSign", - RuneTypeNumberExpDig: "RuneTypeNumberExpDig", - - RuneTypeTrueT: "RuneTypeTrueT", - RuneTypeTrueR: "RuneTypeTrueR", - RuneTypeTrueU: "RuneTypeTrueU", - RuneTypeTrueE: "RuneTypeTrueE", - - RuneTypeFalseF: "RuneTypeFalseF", - RuneTypeFalseA: "RuneTypeFalseA", - RuneTypeFalseL: "RuneTypeFalseL", - RuneTypeFalseS: "RuneTypeFalseS", - RuneTypeFalseE: "RuneTypeFalseE", - - RuneTypeNullN: "RuneTypeNullN", - RuneTypeNullU: "RuneTypeNullU", - RuneTypeNullL1: "RuneTypeNullL1", - RuneTypeNullL2: "RuneTypeNullL2", - - RuneTypeEOF: "RuneTypeEOF", - }[t] - if ok { - return str - } - return fmt.Sprintf("RuneType(%d)", t) -} - -func (t RuneType) String() string { - str, ok := map[RuneType]string{ - RuneTypeError: "x", - - RuneTypeSpace: " ", - - RuneTypeObjectBeg: "{", - RuneTypeObjectColon: ":", - RuneTypeObjectComma: "o", - RuneTypeObjectEnd: "}", - - RuneTypeArrayBeg: "[", - RuneTypeArrayComma: "a", - RuneTypeArrayEnd: "]", - - RuneTypeStringBeg: "โ€œ", - RuneTypeStringChar: "c", - RuneTypeStringEsc: "\\", - RuneTypeStringEsc1: "b", - RuneTypeStringEscU: "u", - RuneTypeStringEscUA: "A", - RuneTypeStringEscUB: "B", - RuneTypeStringEscUC: "C", - RuneTypeStringEscUD: "D", - RuneTypeStringEnd: "โ€", - - RuneTypeNumberIntNeg: "-", - RuneTypeNumberIntZero: "0", - RuneTypeNumberIntDig: "1", - RuneTypeNumberFracDot: ".", - RuneTypeNumberFracDig: "2", - RuneTypeNumberExpE: "e", - RuneTypeNumberExpSign: "+", - RuneTypeNumberExpDig: "3", - - RuneTypeTrueT: "๐•ฅ", // double-struck - RuneTypeTrueR: "๐•ฃ", - RuneTypeTrueU: "๐•ฆ", - RuneTypeTrueE: "๐•–", - - RuneTypeFalseF: "๐”ฃ", // fraktur - RuneTypeFalseA: "๐”ž", - RuneTypeFalseL: "๐”ฉ", - RuneTypeFalseS: "๐”ฐ", - RuneTypeFalseE: "๐”ข", - - RuneTypeNullN: "โ“", // circled - RuneTypeNullU: "โ“ค", - RuneTypeNullL1: "โ“›", - RuneTypeNullL2: "โ“", // +uppercase - - RuneTypeEOF: "$", - }[t] - if ok { - return str - } - return fmt.Sprintf("<%d>", t) -} - -func (t RuneType) jsonType() string { - return map[RuneType]string{ - RuneTypeObjectBeg: "object", - RuneTypeArrayBeg: "array", - RuneTypeStringBeg: "string", - RuneTypeNumberIntNeg: "number", - RuneTypeNumberIntZero: "number", - RuneTypeNumberIntDig: "number", - RuneTypeTrueT: "true", - RuneTypeFalseF: "false", - RuneTypeNullN: "null", - RuneTypeEOF: "eof", - }[t] -} - -func (t RuneType) IsNumber() bool { - return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig -} - -type Parser struct { - // Setting MaxError to a value greater than 0 causes - // HandleRune to return ErrParserExceededMaxDepth if - // objects/arrays become nested more deeply than this. - MaxDepth int - - initialized bool - - err error - closed bool - - // We reuse RuneTypes to store the stack. The base idea is - // that, stack items are "the most recently read - // stack-relevant RuneType". - // - // We treat RuneTypeError as a wildcard. - // - // The "normal"stack-relevant RuneTypes are: - // - // โ€œ\uABC for strings - // -01.2e+3 for numbers - // ๐•ฅ๐•ฃ๐•ฆ for "true" - // ๐”ฃ๐”ž๐”ฉ๐”ฐ for "false" - // โ“โ“คโ“› for "null" - // - // Objects and arrays break the "most recently read RuneType" - // rule; they need some special assignments: - // - // { object: waiting for key to start or '}' - // โ€ object: reading key / waiting for colon - // : object: waiting for value to start - // o object: reading value / waiting for ',' or '}' - // - // [ array: waiting for item to start or ']' - // a array: reading item / waiting for ',' or ']' - // ] array: waiting for item to start - // - // Within each element type, the stack item is replaced, not pushed. - // - // For example, given the input string - // - // {"x":"y","a":"b"} - // - // The stack would be - // - // stack processed - // x - // { { - // โ€โ€œ {" - // โ€โ€œ {"x - // โ€ {"x" - // : {"x": - // oโ€œ {"x":" - // oโ€œ {"x":"y - // o {"x":"y" - // { {"x":"y", - // โ€โ€œ {"x":"y"," - // โ€โ€œ {"x":"y","a - // โ€ {"x":"y","a" - // : {"x":"y","a": - // oโ€œ {"x":"y","a":" - // oโ€œ {"x":"y","a":"b - // o {"x":"y","a":"b" - // {"x":"y","a":"b"} - stack []RuneType -} - -func (par *Parser) pushState(state RuneType) RuneType { - par.stack = append(par.stack, state) - return state -} -func (par *Parser) replaceState(state RuneType) RuneType { - par.stack[len(par.stack)-1] = state - return state -} -func (par *Parser) popState() { - par.stack = par.stack[:len(par.stack)-1] -} - -func (par *Parser) stackString() string { - var buf strings.Builder - for _, s := range par.stack { - buf.WriteString(s.String()) - } - return buf.String() -} - -// Reset all Parser state. -func (par *Parser) Reset() { - *par = Parser{ - MaxDepth: par.MaxDepth, - } -} - -// HandleEOF feeds EOF to the Parser. The returned RuneType is either -// RuneTypeEOF or RuneTypeError. -// -// An error is returned if and only if the RuneType is RuneTypeError. -// Returns io/fs.ErrClosed if .HandleEOF() has previously been called -// (and .Reset() has not been called since). -// -// Once RuneTypeError or RuneTypeEOF has been returned, it will keep -// being returned from both .HandleRune(c) and .HandleEOF() until -// .Reset() is called. -// -// RuneTypeEOF indicates that a complete JSON document has been read. -func (par *Parser) HandleEOF() (RuneType, error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - defer func() { - par.closed = true - }() - if par.err != nil { - return RuneTypeError, par.err - } - if !par.initialized { - par.initialized = true - par.pushState(RuneTypeError) - } - switch len(par.stack) { - case 0: - return RuneTypeEOF, nil - case 1: - switch { - case par.stack[0].IsNumber(): - if _, err := par.HandleRune('\n'); err == nil { - return RuneTypeEOF, nil - } - case par.stack[0] == RuneTypeError: - par.err = io.EOF - return RuneTypeError, par.err - } - fallthrough - default: - par.err = io.ErrUnexpectedEOF - return RuneTypeError, par.err - } -} - -// HandleRune feeds a Unicode rune to the Parser. -// -// An error is returned if and only if the RuneType is RuneTypeError. -// Returns io/fs.ErrClosed if .HandleEOF() has previously been called -// (and .Reset() has not been called since). -// -// Once RuneTypeError or RuneTypeEOF has been returned, it will keep -// being returned from both .HandleRune(c) and .HandleEOF() until -// .Reset() is called. -// -// RuneTypeEOF indicates that the rune cannot be appended to the JSON -// document; a new JSON document must be started in order to process -// that rune. -func (par *Parser) HandleRune(c rune) (RuneType, error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - if par.err != nil { - return RuneTypeError, par.err - } - if !par.initialized { - par.initialized = true - par.pushState(RuneTypeError) - } - if len(par.stack) == 0 { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - return RuneTypeEOF, nil - } - } - switch par.stack[len(par.stack)-1] { - // any ///////////////////////////////////////////////////////////////////////////////////// - case RuneTypeError: - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '{': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { - return RuneTypeError, ErrParserExceededMaxDepth - } - return par.replaceState(RuneTypeObjectBeg), nil - case '[': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { - return RuneTypeError, ErrParserExceededMaxDepth - } - return par.replaceState(RuneTypeArrayBeg), nil - case '"': - return par.replaceState(RuneTypeStringBeg), nil - case '-': - return par.replaceState(RuneTypeNumberIntNeg), nil - case '0': - return par.replaceState(RuneTypeNumberIntZero), nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - case 't': - return par.replaceState(RuneTypeTrueT), nil - case 'f': - return par.replaceState(RuneTypeFalseF), nil - case 'n': - return par.replaceState(RuneTypeNullN), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c) - } - // object ////////////////////////////////////////////////////////////////////////////////// - case RuneTypeObjectBeg: // waiting for key to start or '}' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '"': - par.replaceState(RuneTypeStringEnd) - return par.pushState(RuneTypeStringBeg), nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) - } - case RuneTypeStringEnd: // waiting for ':' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ':': - par.replaceState(RuneTypeObjectComma) - par.pushState(RuneTypeError) - return RuneTypeObjectColon, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after object key", c) - } - case RuneTypeObjectComma: // waiting for ',' or '}' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.replaceState(RuneTypeObjectBeg) - return RuneTypeObjectComma, nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c) - } - // array /////////////////////////////////////////////////////////////////////////////////// - case RuneTypeArrayBeg: // waiting for item to start or ']' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - par.replaceState(RuneTypeArrayComma) - par.pushState(RuneTypeError) - return par.HandleRune(c) - } - case RuneTypeArrayEnd: // waiting for item - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - par.replaceState(RuneTypeArrayComma) - par.pushState(RuneTypeError) - return par.HandleRune(c) - } - case RuneTypeArrayComma: // waiting for ',' or ']' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.replaceState(RuneTypeArrayEnd) - return RuneTypeArrayComma, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after array element", c) - } - // string ////////////////////////////////////////////////////////////////////////////////// - case RuneTypeStringBeg: // waiting for char or '"' - switch { - case c == '\\': - return par.replaceState(RuneTypeStringEsc), nil - case c == '"': - par.popState() - return RuneTypeStringEnd, nil - case 0x0020 <= c && c <= 0x10FFFF: - return RuneTypeStringChar, nil - default: - return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) - } - case RuneTypeStringEsc: // waiting for escape char - switch c { - case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEsc1, nil - case 'u': - return par.replaceState(RuneTypeStringEscU), nil - default: - return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) - } - case RuneTypeStringEscU: - if _, ok := hex2int(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUA: - if _, ok := hex2int(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUB: - if _, ok := hex2int(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUC: - if _, ok := hex2int(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - // number ////////////////////////////////////////////////////////////////////////////////// - // - // Here's a flattened drawing of the syntax diagram from www.json.org : - // - // [------------ integer ----------][-- fraction ---][-------- exponent -------] - // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> - // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ - // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ - // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ - // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ - // โ”‚ โ”‚ - // โ•ฐโ”€"+"โ”€โ•ฏ - // - // Now here it is slightly redrawn, and with each distinct state our - // parser can be in marked with a single-capital-letter: - // - // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] - // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> - // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ - // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ - // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ - // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ - // โ”‚ โ”‚ - // โ•ฐโ”€"+"โ”€โ•ฏ - // - // You may notice that each of these states may be uniquely identified - // by the last-read RuneType: - // - // A = (nothing yet) - // B = IntNeg - // C = IntZero - // D = IntDig - // E = FracDot - // F = FracDig - // G = ExpE - // H = ExpSign - // I = ExpDig - // - // The 'A' state is part of the RuneTypeError "any" case - // above, and the remainder follow: - case RuneTypeNumberIntNeg: // B - switch c { - case '0': - return par.replaceState(RuneTypeNumberIntZero), nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberIntZero: // C - switch c { - case '.': - return par.replaceState(RuneTypeNumberFracDot), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberIntDig: // D - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - case '.': - return par.replaceState(RuneTypeNumberFracDot), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberFracDot: // E - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberFracDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberFracDig: // F - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberFracDig), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberExpE: // G - switch c { - case '-', '+': - return par.replaceState(RuneTypeNumberExpSign), nil - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberExpSign: // H - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberExpDig: // I - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - par.popState() - return par.HandleRune(c) - } - // literals //////////////////////////////////////////////////////////////////////////////// - // true - case RuneTypeTrueT: - return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) - case RuneTypeTrueR: - return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) - case RuneTypeTrueU: - return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) - // false - case RuneTypeFalseF: - return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) - case RuneTypeFalseA: - return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) - case RuneTypeFalseL: - return par.expectRune(c, 's', RuneTypeFalseS, "false", false) - case RuneTypeFalseS: - return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) - // null - case RuneTypeNullN: - return par.expectRune(c, 'u', RuneTypeNullU, "null", false) - case RuneTypeNullU: - return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) - case RuneTypeNullL1: - return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) - default: - panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString())) - } -} - -func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { - if c != exp { - return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp) - } - if pop { - par.popState() - return typ, nil - } else { - return par.replaceState(typ), nil - } -} diff --git a/reencode.go b/reencode.go index e7030f8..34c3851 100644 --- a/reencode.go +++ b/reencode.go @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Luke Shumaker +// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later @@ -9,6 +9,8 @@ import ( "fmt" "io" "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal" ) type speculation struct { @@ -67,13 +69,13 @@ type ReEncoder struct { // state: .WriteRune err error - par Parser + par internal.Parser written int inputPos int64 // state: .handleRune handleRuneState struct { - lastNonSpace RuneType + lastNonSpace internal.RuneType wasNumber bool curIndent int uhex [4]byte // "\uABCD"-encoded characters in strings @@ -129,7 +131,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if err := enc.handleRune(0, RuneTypeError); err != nil { + if err := enc.handleRune(0, internal.RuneTypeError); err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, Offset: enc.inputPos, @@ -163,7 +165,7 @@ rehandle: return enc.written, enc.err } enc.err = enc.handleRune(c, t) - if enc.err == nil && t == RuneTypeEOF { + if enc.err == nil && t == internal.RuneTypeEOF { if enc.AllowMultipleValues { enc.par.Reset() goto rehandle @@ -182,7 +184,7 @@ rehandle: // internal //////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) handleRune(c rune, t RuneType) error { +func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -190,7 +192,7 @@ func (enc *ReEncoder) handleRune(c rune, t RuneType) error { // main if enc.handleRuneState.specu == nil { // not speculating switch t { - case RuneTypeObjectBeg, RuneTypeArrayBeg: // start speculating + case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { return err } @@ -232,7 +234,7 @@ func (enc *ReEncoder) handleRune(c rune, t RuneType) error { return err } enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState - case canCompress && (t == RuneTypeObjectEnd || t == RuneTypeArrayEnd): // stop speculating; use compact + case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.Out); err != nil { return err } @@ -245,7 +247,7 @@ func (enc *ReEncoder) handleRune(c rune, t RuneType) error { return nil } -func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t RuneType) error { +func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error { err, shouldHandle := enc.handleRunePre(c, t) if err != nil { return err @@ -258,9 +260,9 @@ func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t RuneType) error { // handle buffered things that need to happen before the new rune // itself is handled. -func (enc *ReEncoder) handleRunePre(c rune, t RuneType) (error, bool) { +func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // emit newlines between top-level values - if enc.handleRuneState.lastNonSpace == RuneTypeEOF { + if enc.handleRuneState.lastNonSpace == internal.RuneTypeEOF { switch { case enc.handleRuneState.wasNumber && t.IsNumber(): if err := enc.emitByte('\n'); err != nil { @@ -275,10 +277,10 @@ func (enc *ReEncoder) handleRunePre(c rune, t RuneType) (error, bool) { // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits - case RuneTypeNumberFracDot: + case internal.RuneTypeNumberFracDot: enc.handleRuneState.fracZeros = 0 - case RuneTypeNumberFracDig: - if c == '0' && enc.handleRuneState.lastNonSpace == RuneTypeNumberFracDig { + case internal.RuneTypeNumberFracDig: + if c == '0' && enc.handleRuneState.lastNonSpace == internal.RuneTypeNumberFracDig { enc.handleRuneState.fracZeros++ return nil, false } @@ -292,9 +294,9 @@ func (enc *ReEncoder) handleRunePre(c rune, t RuneType) (error, bool) { } } switch t { // trim leading '0's from the exponent-part, but don't remove all digits - case RuneTypeNumberExpE, RuneTypeNumberExpSign: + case internal.RuneTypeNumberExpE, internal.RuneTypeNumberExpSign: enc.handleRuneState.expZero = true - case RuneTypeNumberExpDig: + case internal.RuneTypeNumberExpDig: if c == '0' && enc.handleRuneState.expZero { return nil, false } @@ -311,18 +313,18 @@ func (enc *ReEncoder) handleRunePre(c rune, t RuneType) (error, bool) { // whitespace switch { case enc.Compact: - if t == RuneTypeSpace { + if t == internal.RuneTypeSpace { return nil, false } case enc.Indent != "": switch t { - case RuneTypeSpace: + case internal.RuneTypeSpace: // let us manage whitespace, don't pass it through return nil, false - case RuneTypeObjectEnd, RuneTypeArrayEnd: + case internal.RuneTypeObjectEnd, internal.RuneTypeArrayEnd: enc.handleRuneState.curIndent-- switch enc.handleRuneState.lastNonSpace { - case RuneTypeObjectBeg, RuneTypeArrayBeg: + case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // collapse default: if err := enc.emitNlIndent(); err != nil { @@ -331,17 +333,17 @@ func (enc *ReEncoder) handleRunePre(c rune, t RuneType) (error, bool) { } default: switch enc.handleRuneState.lastNonSpace { - case RuneTypeObjectBeg, RuneTypeObjectComma, RuneTypeArrayBeg, RuneTypeArrayComma: + case internal.RuneTypeObjectBeg, internal.RuneTypeObjectComma, internal.RuneTypeArrayBeg, internal.RuneTypeArrayComma: if err := enc.emitNlIndent(); err != nil { return err, false } - case RuneTypeObjectColon: + case internal.RuneTypeObjectColon: if err := enc.emitByte(' '); err != nil { return err, false } } switch t { - case RuneTypeObjectBeg, RuneTypeArrayBeg: + case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: enc.handleRuneState.curIndent++ } } @@ -351,20 +353,20 @@ func (enc *ReEncoder) handleRunePre(c rune, t RuneType) (error, bool) { } // handle the new rune itself, not buffered things -func (enc *ReEncoder) handleRuneMain(c rune, t RuneType) error { +func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { defer func() { - if t != RuneTypeSpace { + if t != internal.RuneTypeSpace { enc.handleRuneState.lastNonSpace = t } }() switch t { - case RuneTypeStringChar: + case internal.RuneTypeStringChar: return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeNone, enc.BackslashEscape)) - case RuneTypeStringEsc, RuneTypeStringEscU: + case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: return nil - case RuneTypeStringEsc1: + case internal.RuneTypeStringEsc1: switch c { case '"': return enc.emit(writeStringChar(enc.Out, '"', BackslashEscapeShort, enc.BackslashEscape)) @@ -385,17 +387,17 @@ func (enc *ReEncoder) handleRuneMain(c rune, t RuneType) error { default: panic("should not happen") } - case RuneTypeStringEscUA: - enc.handleRuneState.uhex[0], _ = hex2int(c) + case internal.RuneTypeStringEscUA: + enc.handleRuneState.uhex[0], _ = internal.HexToInt(c) return nil - case RuneTypeStringEscUB: - enc.handleRuneState.uhex[1], _ = hex2int(c) + case internal.RuneTypeStringEscUB: + enc.handleRuneState.uhex[1], _ = internal.HexToInt(c) return nil - case RuneTypeStringEscUC: - enc.handleRuneState.uhex[2], _ = hex2int(c) + case internal.RuneTypeStringEscUC: + enc.handleRuneState.uhex[2], _ = internal.HexToInt(c) return nil - case RuneTypeStringEscUD: - enc.handleRuneState.uhex[3], _ = hex2int(c) + case internal.RuneTypeStringEscUD: + enc.handleRuneState.uhex[3], _ = internal.HexToInt(c) c := 0 | rune(enc.handleRuneState.uhex[0])<<12 | rune(enc.handleRuneState.uhex[1])<<8 | @@ -403,16 +405,16 @@ func (enc *ReEncoder) handleRuneMain(c rune, t RuneType) error { rune(enc.handleRuneState.uhex[3])<<0 return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) - case RuneTypeError: // EOF explicitly stated by .Close() + case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough - case RuneTypeEOF: // EOF implied by the start of the next top-level value + case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.handleRuneState.wasNumber = enc.handleRuneState.lastNonSpace.IsNumber() switch { case enc.ForceTrailingNewlines: - t = RuneTypeError // enc.handleRuneState.lastNonSpace : an NL isn't needed (we already printed one) + t = internal.RuneTypeError // enc.handleRuneState.lastNonSpace : an NL isn't needed (we already printed one) return enc.emitByte('\n') default: - t = RuneTypeEOF // enc.handleRuneState.lastNonSpace : an NL *might* be needed + t = internal.RuneTypeEOF // enc.handleRuneState.lastNonSpace : an NL *might* be needed return nil } default: -- cgit v1.1-4-g5e80