From 2b9473f5e8816eeea76b2fdada184532be00d3a2 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 7 Feb 2023 12:18:29 -0700 Subject: internal: Split in to sub-packages --- compat/json/compat_test.go | 14 +- decode.go | 164 ++-- decode_scan.go | 18 +- decode_scan_test.go | 240 +++--- encode_string.go | 26 +- errors.go | 4 +- internal/allwriter.go | 174 ----- internal/base64.go | 128 ---- internal/base64_test.go | 44 -- internal/base64dec/base64.go | 130 ++++ internal/base64dec/base64_test.go | 44 ++ ...ad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 | 2 + ...5cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a | 2 + ...a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 | 2 + ...3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd | 2 + ...775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 | 2 + ...1b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 | 2 + ...be1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d | 2 + ...f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b | 2 + ...920b5202cd1269174416ce32769c7f59376e76b7dd3129c | 2 + ...7433233f3a90099024e580a6ba319ea2bf539880c50bd7c | 2 + ...326638b8915f80863feab0ba0108183b3093934bdc0420c | 2 + ...aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f | 2 + ...7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 | 2 + ...5bd136874415dddfff5c586e662f21420caa7a94131a56a | 2 + ...d54218d2ad8112204672cc1fb30be297853616788208a5c | 2 + ...eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 | 2 + ...77408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 | 2 + ...1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 | 2 + ...c70baf6d7821a5a6f3a90cabb033575790be91723593680 | 2 + ...9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a | 2 + ...27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 | 2 + ...1a201281dbf6568628b4135c35c811dd9bce97620a75d43 | 2 + internal/borrowed_tags.go | 40 - internal/encode.go | 14 - internal/fastio/allwriter.go | 174 +++++ internal/hex.go | 20 - internal/jsonparse/hex.go | 20 + internal/jsonparse/parse.go | 845 +++++++++++++++++++++ internal/jsonparse/parse_test.go | 78 ++ internal/jsontags/borrowed_tags.go | 40 + internal/jsontags/tags.go | 7 + internal/jsontest/jsontest.go | 14 + internal/parse.go | 845 --------------------- internal/parse_test.go | 78 -- internal/tags.go | 7 - ...ad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 | 2 - ...5cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a | 2 - ...a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 | 2 - ...3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd | 2 - ...775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 | 2 - ...1b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 | 2 - ...be1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d | 2 - ...f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b | 2 - ...920b5202cd1269174416ce32769c7f59376e76b7dd3129c | 2 - ...7433233f3a90099024e580a6ba319ea2bf539880c50bd7c | 2 - ...326638b8915f80863feab0ba0108183b3093934bdc0420c | 2 - ...aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f | 2 - ...7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 | 2 - ...5bd136874415dddfff5c586e662f21420caa7a94131a56a | 2 - ...d54218d2ad8112204672cc1fb30be297853616788208a5c | 2 - ...eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 | 2 - ...77408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 | 2 - ...1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 | 2 - ...c70baf6d7821a5a6f3a90cabb033575790be91723593680 | 2 - ...9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a | 2 - ...27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 | 2 - ...1a201281dbf6568628b4135c35c811dd9bce97620a75d43 | 2 - reencode.go | 95 +-- struct.go | 4 +- 70 files changed, 1682 insertions(+), 1673 deletions(-) delete mode 100644 internal/allwriter.go delete mode 100644 internal/base64.go delete mode 100644 internal/base64_test.go create mode 100644 internal/base64dec/base64.go create mode 100644 internal/base64dec/base64_test.go create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 create mode 100644 internal/base64dec/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 delete mode 100644 internal/borrowed_tags.go delete mode 100644 internal/encode.go create mode 100644 internal/fastio/allwriter.go delete mode 100644 internal/hex.go create mode 100644 internal/jsonparse/hex.go create mode 100644 internal/jsonparse/parse.go create mode 100644 internal/jsonparse/parse_test.go create mode 100644 internal/jsontags/borrowed_tags.go create mode 100644 internal/jsontags/tags.go create mode 100644 internal/jsontest/jsontest.go delete mode 100644 internal/parse.go delete mode 100644 internal/parse_test.go delete mode 100644 internal/tags.go delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 delete mode 100644 internal/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index feb850b..4b167d1 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -9,10 +9,12 @@ import ( "io" "git.lukeshu.com/go/lowmemjson" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsontags" + "git.lukeshu.com/go/lowmemjson/internal/jsontest" ) -var parseTag = internal.ParseTag +var parseTag = jsontags.ParseTag type scanner = lowmemjson.ReEncoderConfig @@ -21,13 +23,13 @@ func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error { } func isValidNumber(s string) bool { - var parser internal.Parser + var parser jsonparse.Parser for _, r := range s { if t, _ := parser.HandleRune(r); !t.IsNumber() { return false } } - if t, _ := parser.HandleEOF(); t == internal.RuneTypeError { + if t, _ := parser.HandleEOF(); t == jsonparse.RuneTypeError { return false } return true @@ -51,9 +53,9 @@ type encodeState struct { } func (es *encodeState) string(str string, _ bool) { - internal.EncodeStringFromString(&es.Buffer, str) + jsontest.EncodeStringFromString(&es.Buffer, str) } func (es *encodeState) stringBytes(str []byte, _ bool) { - internal.EncodeStringFromBytes(&es.Buffer, str) + jsontest.EncodeStringFromBytes(&es.Buffer, str) } diff --git a/decode.go b/decode.go index 8638148..3a9a4b1 100644 --- a/decode.go +++ b/decode.go @@ -23,7 +23,9 @@ import ( "unicode/utf16" "unicode/utf8" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/base64dec" + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) // Decodable is the interface implemented by types that can decode a @@ -105,7 +107,7 @@ func NewDecoder(r io.RuneScanner) *Decoder { return &Decoder{ io: runeTypeScanner{ inner: r, - parser: internal.Parser{ + parser: jsonparse.Parser{ MaxDepth: maxNestingDepth, }, }, @@ -145,7 +147,7 @@ func (dec *Decoder) More() bool { dec.io.Reset() _, _, t, e := dec.io.ReadRuneType() _ = dec.io.UnreadRune() // best effort - return e == nil && t != internal.RuneTypeEOF + return e == nil && t != jsonparse.RuneTypeEOF } func (dec *Decoder) posStackPush() { @@ -206,7 +208,7 @@ func (dec *Decoder) DecodeThenEOF(ptr any) (err error) { return err } c, s, t, _ := dec.io.ReadRuneType() - if t != internal.RuneTypeEOF { + if t != jsonparse.RuneTypeEOF { panic("should not happen") } if s > 0 { @@ -274,7 +276,7 @@ func (dec *Decoder) panicType(jTyp string, gTyp reflect.Type, err error) { }) } -func (dec *Decoder) readRune() (rune, internal.RuneType) { +func (dec *Decoder) readRune() (rune, jsonparse.RuneType) { c, _, t, e := dec.io.ReadRuneType() if e != nil { panic(decodeError{ @@ -295,20 +297,20 @@ func (dec *Decoder) unreadRune() { } } -func (dec *Decoder) peekRuneType() internal.RuneType { +func (dec *Decoder) peekRuneType() jsonparse.RuneType { _, t := dec.readRune() dec.unreadRune() return t } -func (dec *Decoder) expectRune(ec rune, et internal.RuneType) { +func (dec *Decoder) expectRune(ec rune, et jsonparse.RuneType) { ac, at := dec.readRune() if ac != ec || at != et { panic("should not happen") } } -func (dec *Decoder) expectRuneType(ec rune, et internal.RuneType, gt reflect.Type) { +func (dec *Decoder) expectRuneType(ec rune, et jsonparse.RuneType, gt reflect.Type) { ac, at := dec.readRune() if ac != ec || at != et { dec.panicType(at.JSONType(), gt, nil) @@ -325,7 +327,7 @@ func (sc *decRuneScanner) ReadRune() (rune, int, error) { return 0, 0, io.EOF } c, s, t, e := sc.dec.io.ReadRuneType() - if t == internal.RuneTypeEOF { + if t == jsonparse.RuneTypeEOF { sc.eof = true sc.dec.io.PopReadBarrier() return 0, 0, io.EOF @@ -412,7 +414,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.panicType(t.JSONType(), reflect.PointerTo(typ), err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(textUnmarshalerType): - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -425,13 +427,13 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { default: switch kind := typ.Kind(); kind { case reflect.Bool: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } val.SetBool(dec.decodeBool(typ)) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -443,7 +445,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } val.SetInt(n) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -455,7 +457,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } val.SetUint(n) case reflect.Float32, reflect.Float64: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -467,7 +469,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } val.SetFloat(n) case reflect.String: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -508,25 +510,25 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { // in the loop) because the only way it's possible is if there's // an interface in there, which'd break from the loop on its own. // - // ptr.CanSet() || dec.peekRuneType() != internal.RuneTypeNullN + // ptr.CanSet() || dec.peekRuneType() != jsonparse.RuneTypeNullN // // We only need the pointer itself to be settable if we're // decoding null. - if ptr.Elem() != val && (ptr.CanSet() || dec.peekRuneType() != internal.RuneTypeNullN) { + if ptr.Elem() != val && (ptr.CanSet() || dec.peekRuneType() != jsonparse.RuneTypeNullN) { dec.decode(ptr, false) break } } // Couldn't get type information from a pointer; fall back to untyped mode. switch dec.peekRuneType() { - case internal.RuneTypeNullN: + case jsonparse.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) default: val.Set(reflect.ValueOf(dec.decodeAny())) } case reflect.Struct: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -557,7 +559,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { if dec.disallowUnknownFields { dec.panicType("", typ, fmt.Errorf("json: unknown field %q", name)) } - dec.scan(internal.Discard) + dec.scan(fastio.Discard) return } field := index.byPos[idx] @@ -569,7 +571,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", fVal.Type().Elem())) } - if dec.peekRuneType() != internal.RuneTypeNullN { + if dec.peekRuneType() != jsonparse.RuneTypeNullN { if fVal.IsNil() { fVal.Set(reflect.New(fVal.Type().Elem())) } @@ -580,7 +582,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } if field.Quote { switch t := dec.peekRuneType(); t { - case internal.RuneTypeNullN: + case jsonparse.RuneTypeNullN: dec.decodeNull() switch fVal.Kind() { // XXX: I can't justify this list, other than "it's what encoding/json @@ -590,7 +592,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { default: // do nothing??? } - case internal.RuneTypeStringBeg: + case jsonparse.RuneTypeStringBeg: // TODO: Figure out how to do this without buffering, have correct offsets. var buf bytes.Buffer dec.decodeString(nil, &buf) @@ -612,10 +614,10 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { }) case reflect.Map: switch t := dec.peekRuneType(); t { - case internal.RuneTypeNullN: + case jsonparse.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) - case internal.RuneTypeObjectBeg: + case jsonparse.RuneTypeObjectBeg: if val.IsNil() { val.Set(reflect.MakeMap(typ)) } @@ -670,24 +672,24 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } case reflect.Slice: switch { - case typ.Elem().Kind() == reflect.Uint8 && !(dec.peekRuneType() == internal.RuneTypeArrayBeg && (false || + case typ.Elem().Kind() == reflect.Uint8 && !(dec.peekRuneType() == jsonparse.RuneTypeArrayBeg && (false || reflect.PointerTo(typ.Elem()).Implements(decodableType) || reflect.PointerTo(typ.Elem()).Implements(jsonUnmarshalerType) || reflect.PointerTo(typ.Elem()).Implements(textUnmarshalerType))): switch t := dec.peekRuneType(); t { - case internal.RuneTypeNullN: + case jsonparse.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) - case internal.RuneTypeStringBeg: + case jsonparse.RuneTypeStringBeg: if typ.Elem() == byteType { var buf bytes.Buffer - dec.decodeString(typ, internal.NewBase64Decoder(&buf)) + dec.decodeString(typ, base64dec.NewBase64Decoder(&buf)) val.Set(reflect.ValueOf(buf.Bytes())) } else { // TODO: Surely there's a better way. At the very least, we should // avoid buffering. var buf bytes.Buffer - dec.decodeString(typ, internal.NewBase64Decoder(&buf)) + dec.decodeString(typ, base64dec.NewBase64Decoder(&buf)) bs := buf.Bytes() val.Set(reflect.MakeSlice(typ, len(bs), len(bs))) for i := 0; i < len(bs); i++ { @@ -699,10 +701,10 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } default: switch t := dec.peekRuneType(); t { - case internal.RuneTypeNullN: + case jsonparse.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) - case internal.RuneTypeArrayBeg: + case jsonparse.RuneTypeArrayBeg: if val.IsNil() { val.Set(reflect.MakeSlice(typ, 0, 0)) } @@ -725,7 +727,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } } case reflect.Array: - if nullOK && dec.peekRuneType() == internal.RuneTypeNullN { + if nullOK && dec.peekRuneType() == jsonparse.RuneTypeNullN { dec.decodeNull() return } @@ -741,7 +743,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.decode(mValPtr.Elem(), false) val.Index(i).Set(mValPtr.Elem()) } else { - dec.scan(internal.Discard) + dec.scan(fastio.Discard) } i++ }) @@ -750,7 +752,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } case reflect.Pointer: switch dec.peekRuneType() { - case internal.RuneTypeNullN: + case jsonparse.RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) default: @@ -765,7 +767,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } } -func (dec *Decoder) scan(out internal.RuneWriter) { +func (dec *Decoder) scan(out fastio.RuneWriter) { limiter := dec.limitingScanner() for { c, _, err := limiter.ReadRune() @@ -776,7 +778,7 @@ func (dec *Decoder) scan(out internal.RuneWriter) { } } -func (dec *Decoder) scanNumber(gTyp reflect.Type, out internal.RuneWriter) { +func (dec *Decoder) scanNumber(gTyp reflect.Type, out fastio.RuneWriter) { if t := dec.peekRuneType(); !t.IsNumber() { dec.panicType(t.JSONType(), gTyp, nil) } @@ -901,23 +903,23 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er } func (dec *Decoder) decodeObject(gTyp reflect.Type, decodeKey, decodeVal func()) { - dec.expectRuneType('{', internal.RuneTypeObjectBeg, gTyp) + dec.expectRuneType('{', jsonparse.RuneTypeObjectBeg, gTyp) _, t := dec.readRune() switch t { - case internal.RuneTypeObjectEnd: + case jsonparse.RuneTypeObjectEnd: return - case internal.RuneTypeStringBeg: + case jsonparse.RuneTypeStringBeg: decodeMember: dec.unreadRune() decodeKey() - dec.expectRune(':', internal.RuneTypeObjectColon) + dec.expectRune(':', jsonparse.RuneTypeObjectColon) decodeVal() _, t := dec.readRune() switch t { - case internal.RuneTypeObjectComma: - dec.expectRune('"', internal.RuneTypeStringBeg) + case jsonparse.RuneTypeObjectComma: + dec.expectRune('"', jsonparse.RuneTypeStringBeg) goto decodeMember - case internal.RuneTypeObjectEnd: + case jsonparse.RuneTypeObjectEnd: return default: panic("should not happen") @@ -972,10 +974,10 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e } func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { - dec.expectRuneType('[', internal.RuneTypeArrayBeg, gTyp) + dec.expectRuneType('[', jsonparse.RuneTypeArrayBeg, gTyp) _, t := dec.readRune() switch t { - case internal.RuneTypeArrayEnd: + case jsonparse.RuneTypeArrayEnd: return default: dec.unreadRune() @@ -983,9 +985,9 @@ func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { decodeMember() _, t := dec.readRune() switch t { - case internal.RuneTypeArrayComma: + case jsonparse.RuneTypeArrayComma: goto decodeNextMember - case internal.RuneTypeArrayEnd: + case jsonparse.RuneTypeArrayEnd: return default: panic("should not happen") @@ -993,17 +995,17 @@ func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { } } -func (dec *Decoder) decodeString(gTyp reflect.Type, out internal.RuneWriter) { - dec.expectRuneType('"', internal.RuneTypeStringBeg, gTyp) +func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) { + dec.expectRuneType('"', jsonparse.RuneTypeStringBeg, gTyp) var uhex [4]byte for { c, t := dec.readRune() switch t { - case internal.RuneTypeStringChar: + case jsonparse.RuneTypeStringChar: _, _ = out.WriteRune(c) - case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: + case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing - case internal.RuneTypeStringEsc1: + case jsonparse.RuneTypeStringEsc1: switch c { case '"': _, _ = out.WriteRune('"') @@ -1024,14 +1026,14 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out internal.RuneWriter) { default: panic("should not happen") } - case internal.RuneTypeStringEscUA: - uhex[0], _ = internal.HexToInt(c) - case internal.RuneTypeStringEscUB: - uhex[1], _ = internal.HexToInt(c) - case internal.RuneTypeStringEscUC: - uhex[2], _ = internal.HexToInt(c) - case internal.RuneTypeStringEscUD: - uhex[3], _ = internal.HexToInt(c) + case jsonparse.RuneTypeStringEscUA: + uhex[0], _ = jsonparse.HexToInt(c) + case jsonparse.RuneTypeStringEscUB: + uhex[1], _ = jsonparse.HexToInt(c) + case jsonparse.RuneTypeStringEscUC: + uhex[2], _ = jsonparse.HexToInt(c) + case jsonparse.RuneTypeStringEscUD: + uhex[3], _ = jsonparse.HexToInt(c) c = 0 | rune(uhex[0])<<12 | rune(uhex[1])<<8 | @@ -1039,25 +1041,25 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out internal.RuneWriter) { rune(uhex[3])<<0 handleUnicode: if utf16.IsSurrogate(c) { - if dec.peekRuneType() != internal.RuneTypeStringEsc { + if dec.peekRuneType() != jsonparse.RuneTypeStringEsc { _, _ = out.WriteRune(utf8.RuneError) break } - dec.expectRune('\\', internal.RuneTypeStringEsc) - if dec.peekRuneType() != internal.RuneTypeStringEscU { + dec.expectRune('\\', jsonparse.RuneTypeStringEsc) + if dec.peekRuneType() != jsonparse.RuneTypeStringEscU { _, _ = out.WriteRune(utf8.RuneError) break } - dec.expectRune('u', internal.RuneTypeStringEscU) + dec.expectRune('u', jsonparse.RuneTypeStringEscU) b, _ := dec.readRune() - uhex[0], _ = internal.HexToInt(b) + uhex[0], _ = jsonparse.HexToInt(b) b, _ = dec.readRune() - uhex[1], _ = internal.HexToInt(b) + uhex[1], _ = jsonparse.HexToInt(b) b, _ = dec.readRune() - uhex[2], _ = internal.HexToInt(b) + uhex[2], _ = jsonparse.HexToInt(b) b, _ = dec.readRune() - uhex[3], _ = internal.HexToInt(b) + uhex[3], _ = jsonparse.HexToInt(b) c2 := 0 | rune(uhex[0])<<12 | rune(uhex[1])<<8 | @@ -1073,7 +1075,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out internal.RuneWriter) { } else { _, _ = out.WriteRune(c) } - case internal.RuneTypeStringEnd: + case jsonparse.RuneTypeStringEnd: return default: panic("should not happen") @@ -1085,15 +1087,15 @@ func (dec *Decoder) decodeBool(gTyp reflect.Type) bool { c, t := dec.readRune() switch c { case 't': - dec.expectRune('r', internal.RuneTypeTrueR) - dec.expectRune('u', internal.RuneTypeTrueU) - dec.expectRune('e', internal.RuneTypeTrueE) + dec.expectRune('r', jsonparse.RuneTypeTrueR) + dec.expectRune('u', jsonparse.RuneTypeTrueU) + dec.expectRune('e', jsonparse.RuneTypeTrueE) return true case 'f': - dec.expectRune('a', internal.RuneTypeFalseA) - dec.expectRune('l', internal.RuneTypeFalseL) - dec.expectRune('s', internal.RuneTypeFalseS) - dec.expectRune('e', internal.RuneTypeFalseE) + dec.expectRune('a', jsonparse.RuneTypeFalseA) + dec.expectRune('l', jsonparse.RuneTypeFalseL) + dec.expectRune('s', jsonparse.RuneTypeFalseS) + dec.expectRune('e', jsonparse.RuneTypeFalseE) return false default: dec.panicType(t.JSONType(), gTyp, nil) @@ -1102,8 +1104,8 @@ func (dec *Decoder) decodeBool(gTyp reflect.Type) bool { } func (dec *Decoder) decodeNull() { - dec.expectRune('n', internal.RuneTypeNullN) - dec.expectRune('u', internal.RuneTypeNullU) - dec.expectRune('l', internal.RuneTypeNullL1) - dec.expectRune('l', internal.RuneTypeNullL2) + dec.expectRune('n', jsonparse.RuneTypeNullN) + dec.expectRune('u', jsonparse.RuneTypeNullU) + dec.expectRune('l', jsonparse.RuneTypeNullL1) + dec.expectRune('l', jsonparse.RuneTypeNullL2) } diff --git a/decode_scan.go b/decode_scan.go index 521c5c4..7a52975 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -7,7 +7,7 @@ package lowmemjson import ( "io" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) type runeTypeScanner struct { @@ -16,7 +16,7 @@ type runeTypeScanner struct { inner io.RuneScanner // initialized by constructor - parser internal.Parser // initialized by constructor + parser jsonparse.Parser // initialized by constructor offset int64 initialized bool @@ -24,13 +24,13 @@ type runeTypeScanner struct { rRune rune rSize int - rType internal.RuneType + rType jsonparse.RuneType rErr error } func (sc *runeTypeScanner) Reset() { sc.parser.Reset() - if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { + if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { sc.repeat = false // re-figure the rType and rErr var err error @@ -57,9 +57,9 @@ func (sc *runeTypeScanner) Reset() { // end of both value and file: (_, 0, RuneTypeEOF, nil) // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) -func (sc *runeTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { +func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { switch { - case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF): + case sc.initialized && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): // do nothing case sc.repeat: _, _, _ = sc.inner.ReadRune() @@ -80,7 +80,7 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) } else { sc.rErr = nil } - if sc.rType == internal.RuneTypeSpace { + if sc.rType == jsonparse.RuneTypeSpace { goto again } case io.EOF: @@ -133,7 +133,7 @@ func (sc *runeTypeScanner) PushReadBarrier() { func (sc *runeTypeScanner) PopReadBarrier() { sc.parser.PopBarrier() - if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { + if sc.repeat || (sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0) { // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune) @@ -147,7 +147,7 @@ func (sc *runeTypeScanner) PopReadBarrier() { } // tell it to use that rType and rErr _ = sc.UnreadRune() // we set it up to always succeed - } else if sc.rType == internal.RuneTypeEOF { + } else if sc.rType == jsonparse.RuneTypeEOF { // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleEOF() diff --git a/decode_scan_test.go b/decode_scan_test.go index d0725e5..1d61157 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -12,13 +12,13 @@ import ( "github.com/stretchr/testify/assert" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) type ReadRuneTypeResult struct { r rune s int - t internal.RuneType + t jsonparse.RuneType e error } @@ -54,149 +54,149 @@ func TestRuneTypeScanner(t *testing.T) { t.Parallel() testcases := map[string]runeTypeScannerTestcase{ "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {0, unreadRune, 0, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "unread2": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {0, unreadRune, 0, nil}, {0, unreadRune, 0, ErrInvalidUnreadRune}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, unreadRune, 0, ErrInvalidUnreadRune}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "tail-ws": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'0', 1, internal.RuneTypeNumberIntZero, nil}, - {',', 1, internal.RuneTypeArrayComma, nil}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'0', 1, jsonparse.RuneTypeNumberIntZero, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, }}, "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, + {'{', 1, jsonparse.RuneTypeEOF, nil}, }}, "early-eof": {` {`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {0, 0, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, }}, "empty": {``, ``, []ReadRuneTypeResult{ - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, }}, "basic2": {`1`, ``, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "fragment": {`1,`, ``, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {',', 1, internal.RuneTypeEOF, nil}, - {',', 1, internal.RuneTypeEOF, nil}, - {',', 1, internal.RuneTypeEOF, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeEOF, nil}, }}, "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, {0, pushReadBarrier, 0, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeEOF, nil}, - {'}', 1, internal.RuneTypeEOF, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeEOF, nil}, + {'}', 1, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, } func() { @@ -214,9 +214,9 @@ func TestRuneTypeScanner(t *testing.T) { } tc.Input = `[1,` + tc.Input tc.Exp = append([]ReadRuneTypeResult{ - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {',', 1, internal.RuneTypeArrayComma, nil}, + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, {0, pushReadBarrier, 0, nil}, }, tc.Exp...) for i := 2; i < len(tc.Exp); i++ { diff --git a/encode_string.go b/encode_string.go index 12f934e..a5d6633 100644 --- a/encode_string.go +++ b/encode_string.go @@ -8,17 +8,19 @@ import ( "io" "unicode/utf8" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsontest" ) func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { buf := [6]byte{ '\\', 'u', - internal.Hex[(c>>12)&0xf], - internal.Hex[(c>>8)&0xf], - internal.Hex[(c>>4)&0xf], - internal.Hex[(c>>0)&0xf], + jsonparse.Hex[(c>>12)&0xf], + jsonparse.Hex[(c>>8)&0xf], + jsonparse.Hex[(c>>4)&0xf], + jsonparse.Hex[(c>>0)&0xf], } return w.Write(buf[:]) } @@ -45,7 +47,7 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { return w.Write(buf[:]) } -func writeStringChar(w internal.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { +func writeStringChar(w fastio.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { if escaper == nil { escaper = EscapeDefault } @@ -83,7 +85,7 @@ func writeStringChar(w internal.AllWriter, c rune, wasEscaped BackslashEscapeMod } } -func encodeStringFromString(w internal.AllWriter, escaper BackslashEscaper, str string) error { +func encodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error { if err := w.WriteByte('"'); err != nil { return err } @@ -98,7 +100,7 @@ func encodeStringFromString(w internal.AllWriter, escaper BackslashEscaper, str return nil } -func encodeStringFromBytes(w internal.AllWriter, escaper BackslashEscaper, str []byte) error { +func encodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error { if err := w.WriteByte('"'); err != nil { return err } @@ -116,13 +118,13 @@ func encodeStringFromBytes(w internal.AllWriter, escaper BackslashEscaper, str [ } func init() { - internal.EncodeStringFromString = func(w io.Writer, s string) { - if err := encodeStringFromString(internal.NewAllWriter(w), nil, s); err != nil { + jsontest.EncodeStringFromString = func(w io.Writer, s string) { + if err := encodeStringFromString(fastio.NewAllWriter(w), nil, s); err != nil { panic(err) } } - internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { - if err := encodeStringFromBytes(internal.NewAllWriter(w), nil, s); err != nil { + jsontest.EncodeStringFromBytes = func(w io.Writer, s []byte) { + if err := encodeStringFromBytes(fastio.NewAllWriter(w), nil, s); err != nil { panic(err) } } diff --git a/errors.go b/errors.go index fe48723..0a47db4 100644 --- a/errors.go +++ b/errors.go @@ -11,7 +11,7 @@ import ( "reflect" "strings" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) // ErrInvalidUnreadRune is returned to Decodable.DecodeJSON(scanner) @@ -24,7 +24,7 @@ var ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune") // ErrParserExceededMaxDepth is the base error that a // *DecodeSyntaxError wraps when the depth of the JSON document // exceeds 10000. -var ErrParserExceededMaxDepth = internal.ErrParserExceededMaxDepth +var ErrParserExceededMaxDepth = jsonparse.ErrParserExceededMaxDepth // low-level decode errors ///////////////////////////////////////////////////////////////////////// // These will be wrapped in a *DecodeError. diff --git a/internal/allwriter.go b/internal/allwriter.go deleted file mode 100644 index 187aa8e..0000000 --- a/internal/allwriter.go +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (C) 2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "io" - "unicode/utf8" -) - -// interfaces ///////////////////////////////////////////////////////////////// - -type RuneWriter interface { - WriteRune(rune) (int, error) -} - -// An AllWriter is the union of several common writer interfaces. -type AllWriter interface { - io.Writer - io.ByteWriter - RuneWriter - io.StringWriter -} - -// implementations //////////////////////////////////////////////////////////// - -func WriteByte(w io.Writer, b byte) error { - var buf [1]byte - buf[0] = b - _, err := w.Write(buf[:]) - return err -} - -func WriteRune(w io.Writer, r rune) (int, error) { - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], r) - return w.Write(buf[:n]) -} - -func WriteString(w io.Writer, s string) (int, error) { - return w.Write([]byte(s)) -} - -// wrappers /////////////////////////////////////////////////////////////////// - -// NNN - -type ( - writerNNN interface{ io.Writer } - writerNNNWrapper struct{ writerNNN } -) - -func (w writerNNNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } -func (w writerNNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } -func (w writerNNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } - -// NNY - -type ( - writerNNY interface { - io.Writer - io.StringWriter - } - writerNNYWrapper struct{ writerNNY } -) - -func (w writerNNYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } -func (w writerNNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } - -// NYN - -type ( - writerNYN interface { - io.Writer - RuneWriter - } - writerNYNWrapper struct{ writerNYN } -) - -func (w writerNYNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } -func (w writerNYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } - -// NYY - -type ( - writerNYY interface { - io.Writer - RuneWriter - io.StringWriter - } - writerNYYWrapper struct{ writerNYY } -) - -func (w writerNYYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } - -// YNN - -type ( - writerYNN interface { - io.Writer - io.ByteWriter - } - writerYNNWrapper struct{ writerYNN } -) - -func (w writerYNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } -func (w writerYNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } - -// YNY - -type ( - writerYNY interface { - io.Writer - io.ByteWriter - io.StringWriter - } - writerYNYWrapper struct{ writerYNY } -) - -func (w writerYNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } - -// YYN - -type ( - writerYYN interface { - io.Writer - io.ByteWriter - RuneWriter - } - writerYYNWrapper struct{ writerYYN } -) - -func (w writerYYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } - -// NewAllWriter wraps an io.Writer turning it in to an AllWriter. If -// the io.Writer already has any of the other write methods, then its -// native version of those methods are used. -func NewAllWriter(inner io.Writer) AllWriter { - switch inner := inner.(type) { - // 3 Y bits - case AllWriter: // YYY: - return inner - // 2 Y bits - case writerNYY: - return writerNYYWrapper{writerNYY: inner} - case writerYNY: - return writerYNYWrapper{writerYNY: inner} - case writerYYN: - return writerYYNWrapper{writerYYN: inner} - // 1 Y bit - case writerNNY: - return writerNNYWrapper{writerNNY: inner} - case writerNYN: - return writerNYNWrapper{writerNYN: inner} - case writerYNN: - return writerYNNWrapper{writerYNN: inner} - // 0 Y bits - default: // NNN: - return writerNNNWrapper{writerNNN: inner} - } -} - -// discard ///////////////////////////////////////////////////////////////////// - -// Discard is like io.Discard, but implements AllWriter. -var Discard = discard{} - -type discard struct{} - -func (discard) Write(p []byte) (int, error) { return len(p), nil } -func (discard) WriteByte(b byte) error { return nil } -func (discard) WriteRune(r rune) (int, error) { return 0, nil } -func (discard) WriteString(s string) (int, error) { return len(s), nil } diff --git a/internal/base64.go b/internal/base64.go deleted file mode 100644 index 291a229..0000000 --- a/internal/base64.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "encoding/base64" - "io" - "strings" -) - -type base64Decoder struct { - dst io.Writer - - err error - pos int64 - buf [4]byte - bufLen int -} - -func NewBase64Decoder(w io.Writer) interface { - io.WriteCloser - RuneWriter -} { - return &base64Decoder{ - dst: w, - } -} - -func (dec *base64Decoder) decodeByte(b byte) (byte, bool) { - const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - n := strings.IndexByte(alphabet, b) - if n < 0 { - return 0, false - } - dec.pos++ - return byte(n), true -} - -func (dec *base64Decoder) decodeTuple(a, b, c, d byte) error { - var decodedLen int - var encoded [4]byte - var ok bool - - if a != '=' { - encoded[0], ok = dec.decodeByte(a) - if !ok { - return base64.CorruptInputError(dec.pos) - } - decodedLen++ - } - if b != '=' { - encoded[1], ok = dec.decodeByte(b) - if !ok { - return base64.CorruptInputError(dec.pos) - } - // do NOT increment decodedLen here - } - if c != '=' { - encoded[2], ok = dec.decodeByte(c) - if !ok { - return base64.CorruptInputError(dec.pos) - } - decodedLen++ - } - if d != '=' { - encoded[3], ok = dec.decodeByte(d) - if !ok { - return base64.CorruptInputError(dec.pos) - } - decodedLen++ - } - - val := 0 | - uint32(encoded[0])<<18 | - uint32(encoded[1])<<12 | - uint32(encoded[2])<<6 | - uint32(encoded[3])<<0 - var decoded [3]byte - decoded[0] = byte(val >> 16) - decoded[1] = byte(val >> 8) - decoded[2] = byte(val >> 0) - - _, err := dec.dst.Write(decoded[:decodedLen]) - return err -} - -func (dec *base64Decoder) Write(dat []byte) (int, error) { - if len(dat) == 0 { - return 0, nil - } - if dec.err != nil { - return 0, dec.err - } - var n int - if dec.bufLen > 0 { - n = copy(dec.buf[dec.bufLen:], dat) - dec.bufLen += n - if dec.bufLen < 4 { - return len(dat), nil - } - if err := dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]); err != nil { - dec.err = err - return 0, dec.err - } - } - for ; n+3 < len(dat); n += 4 { - if err := dec.decodeTuple(dat[n], dat[n+1], dat[n+2], dat[n+3]); err != nil { - dec.err = err - return n, dec.err - } - } - dec.bufLen = copy(dec.buf[:], dat[n:]) - return len(dat), nil -} - -func (dec *base64Decoder) WriteRune(r rune) (int, error) { - return WriteRune(dec, r) -} - -func (dec *base64Decoder) Close() error { - if dec.bufLen == 0 { - return nil - } - copy(dec.buf[:], "====") - return dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]) -} diff --git a/internal/base64_test.go b/internal/base64_test.go deleted file mode 100644 index f18bcd7..0000000 --- a/internal/base64_test.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "bytes" - "encoding/base64" - "testing" - - "github.com/stretchr/testify/require" -) - -func b64encode(t *testing.T, input []byte) []byte { - var encoded bytes.Buffer - enc := base64.NewEncoder(base64.StdEncoding, &encoded) - _, err := enc.Write(input) - require.NoError(t, err) - require.NoError(t, enc.Close()) - return encoded.Bytes() -} - -func b64decode(t *testing.T, input []byte) []byte { - var decoded bytes.Buffer - dec := NewBase64Decoder(&decoded) - _, err := dec.Write(input) - require.NoError(t, err) - require.NoError(t, dec.Close()) - return decoded.Bytes() -} - -func FuzzBase64Decoder(f *testing.F) { - f.Fuzz(func(t *testing.T, input []byte) { - encoded := b64encode(t, input) - decoded := b64decode(t, encoded) - t.Logf("input b64 = %q", encoded) - t.Logf("expected decoded = %#v", input) - t.Logf("actual decoded = %#v", decoded) - if !bytes.Equal(input, decoded) { - t.Fail() - } - }) -} diff --git a/internal/base64dec/base64.go b/internal/base64dec/base64.go new file mode 100644 index 0000000..dcb4b1c --- /dev/null +++ b/internal/base64dec/base64.go @@ -0,0 +1,130 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package base64dec + +import ( + "encoding/base64" + "io" + "strings" + + "git.lukeshu.com/go/lowmemjson/internal/fastio" +) + +type base64Decoder struct { + dst io.Writer + + err error + pos int64 + buf [4]byte + bufLen int +} + +func NewBase64Decoder(w io.Writer) interface { + io.WriteCloser + fastio.RuneWriter +} { + return &base64Decoder{ + dst: w, + } +} + +func (dec *base64Decoder) decodeByte(b byte) (byte, bool) { + const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + n := strings.IndexByte(alphabet, b) + if n < 0 { + return 0, false + } + dec.pos++ + return byte(n), true +} + +func (dec *base64Decoder) decodeTuple(a, b, c, d byte) error { + var decodedLen int + var encoded [4]byte + var ok bool + + if a != '=' { + encoded[0], ok = dec.decodeByte(a) + if !ok { + return base64.CorruptInputError(dec.pos) + } + decodedLen++ + } + if b != '=' { + encoded[1], ok = dec.decodeByte(b) + if !ok { + return base64.CorruptInputError(dec.pos) + } + // do NOT increment decodedLen here + } + if c != '=' { + encoded[2], ok = dec.decodeByte(c) + if !ok { + return base64.CorruptInputError(dec.pos) + } + decodedLen++ + } + if d != '=' { + encoded[3], ok = dec.decodeByte(d) + if !ok { + return base64.CorruptInputError(dec.pos) + } + decodedLen++ + } + + val := 0 | + uint32(encoded[0])<<18 | + uint32(encoded[1])<<12 | + uint32(encoded[2])<<6 | + uint32(encoded[3])<<0 + var decoded [3]byte + decoded[0] = byte(val >> 16) + decoded[1] = byte(val >> 8) + decoded[2] = byte(val >> 0) + + _, err := dec.dst.Write(decoded[:decodedLen]) + return err +} + +func (dec *base64Decoder) Write(dat []byte) (int, error) { + if len(dat) == 0 { + return 0, nil + } + if dec.err != nil { + return 0, dec.err + } + var n int + if dec.bufLen > 0 { + n = copy(dec.buf[dec.bufLen:], dat) + dec.bufLen += n + if dec.bufLen < 4 { + return len(dat), nil + } + if err := dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]); err != nil { + dec.err = err + return 0, dec.err + } + } + for ; n+3 < len(dat); n += 4 { + if err := dec.decodeTuple(dat[n], dat[n+1], dat[n+2], dat[n+3]); err != nil { + dec.err = err + return n, dec.err + } + } + dec.bufLen = copy(dec.buf[:], dat[n:]) + return len(dat), nil +} + +func (dec *base64Decoder) WriteRune(r rune) (int, error) { + return fastio.WriteRune(dec, r) +} + +func (dec *base64Decoder) Close() error { + if dec.bufLen == 0 { + return nil + } + copy(dec.buf[:], "====") + return dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]) +} diff --git a/internal/base64dec/base64_test.go b/internal/base64dec/base64_test.go new file mode 100644 index 0000000..cb3063d --- /dev/null +++ b/internal/base64dec/base64_test.go @@ -0,0 +1,44 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package base64dec + +import ( + "bytes" + "encoding/base64" + "testing" + + "github.com/stretchr/testify/require" +) + +func b64encode(t *testing.T, input []byte) []byte { + var encoded bytes.Buffer + enc := base64.NewEncoder(base64.StdEncoding, &encoded) + _, err := enc.Write(input) + require.NoError(t, err) + require.NoError(t, enc.Close()) + return encoded.Bytes() +} + +func b64decode(t *testing.T, input []byte) []byte { + var decoded bytes.Buffer + dec := NewBase64Decoder(&decoded) + _, err := dec.Write(input) + require.NoError(t, err) + require.NoError(t, dec.Close()) + return decoded.Bytes() +} + +func FuzzBase64Decoder(f *testing.F) { + f.Fuzz(func(t *testing.T, input []byte) { + encoded := b64encode(t, input) + decoded := b64decode(t, encoded) + t.Logf("input b64 = %q", encoded) + t.Logf("expected decoded = %#v", input) + t.Logf("actual decoded = %#v", decoded) + if !bytes.Equal(input, decoded) { + t.Fail() + } + }) +} diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 new file mode 100644 index 0000000..c3774e7 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a new file mode 100644 index 0000000..4c861db --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 new file mode 100644 index 0000000..3d32e14 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd new file mode 100644 index 0000000..d08ef92 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 new file mode 100644 index 0000000..a96f559 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 new file mode 100644 index 0000000..87c024d --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0000000000000000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d new file mode 100644 index 0000000..959401e --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b new file mode 100644 index 0000000..bd1ae59 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c new file mode 100644 index 0000000..09e0ad2 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c new file mode 100644 index 0000000..e8000f3 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c new file mode 100644 index 0000000..aac6b7d --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f new file mode 100644 index 0000000..f3bf6d9 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 new file mode 100644 index 0000000..2e7f462 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a new file mode 100644 index 0000000..c541f52 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c new file mode 100644 index 0000000..5d56f29 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 new file mode 100644 index 0000000..4b4d59f --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 new file mode 100644 index 0000000..ef9f9d4 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 new file mode 100644 index 0000000..67322c7 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 new file mode 100644 index 0000000..f195330 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\x04000000000000\r00000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a new file mode 100644 index 0000000..5b0d392 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0000000000000000000000000000000000000000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 new file mode 100644 index 0000000..a389d3c --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00000") diff --git a/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 new file mode 100644 index 0000000..17d10b2 --- /dev/null +++ b/internal/base64dec/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("000000000000") diff --git a/internal/borrowed_tags.go b/internal/borrowed_tags.go deleted file mode 100644 index 6eaf5da..0000000 --- a/internal/borrowed_tags.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -// SPDX-License-Identifier: BSD-3-Clause - -package internal // MODIFIED: changed package name - -import ( - "strings" -) - -// tagOptions is the string following a comma in a struct field's "json" -// tag, or the empty string. It does not include the leading comma. -type tagOptions string - -// parseTag splits a struct field's json tag into its name and -// comma-separated options. -func parseTag(tag string) (string, tagOptions) { - tag, opt, _ := strings.Cut(tag, ",") - return tag, tagOptions(opt) -} - -// Contains reports whether a comma-separated list of options -// contains a particular substr flag. substr must be surrounded by a -// string boundary or commas. -func (o tagOptions) Contains(optionName string) bool { - if len(o) == 0 { - return false - } - s := string(o) - for s != "" { - var name string - name, s, _ = strings.Cut(s, ",") - if name == optionName { - return true - } - } - return false -} diff --git a/internal/encode.go b/internal/encode.go deleted file mode 100644 index 8aae673..0000000 --- a/internal/encode.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "io" -) - -var ( - EncodeStringFromBytes func(io.Writer, []byte) - EncodeStringFromString func(io.Writer, string) -) diff --git a/internal/fastio/allwriter.go b/internal/fastio/allwriter.go new file mode 100644 index 0000000..9de8fdc --- /dev/null +++ b/internal/fastio/allwriter.go @@ -0,0 +1,174 @@ +// Copyright (C) 2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package fastio + +import ( + "io" + "unicode/utf8" +) + +// interfaces ///////////////////////////////////////////////////////////////// + +type RuneWriter interface { + WriteRune(rune) (int, error) +} + +// An AllWriter is the union of several common writer interfaces. +type AllWriter interface { + io.Writer + io.ByteWriter + RuneWriter + io.StringWriter +} + +// implementations //////////////////////////////////////////////////////////// + +func WriteByte(w io.Writer, b byte) error { + var buf [1]byte + buf[0] = b + _, err := w.Write(buf[:]) + return err +} + +func WriteRune(w io.Writer, r rune) (int, error) { + var buf [utf8.UTFMax]byte + n := utf8.EncodeRune(buf[:], r) + return w.Write(buf[:n]) +} + +func WriteString(w io.Writer, s string) (int, error) { + return w.Write([]byte(s)) +} + +// wrappers /////////////////////////////////////////////////////////////////// + +// NNN + +type ( + writerNNN interface{ io.Writer } + writerNNNWrapper struct{ writerNNN } +) + +func (w writerNNNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } +func (w writerNNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NNY + +type ( + writerNNY interface { + io.Writer + io.StringWriter + } + writerNNYWrapper struct{ writerNNY } +) + +func (w writerNNYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } + +// NYN + +type ( + writerNYN interface { + io.Writer + RuneWriter + } + writerNYNWrapper struct{ writerNYN } +) + +func (w writerNYNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NYY + +type ( + writerNYY interface { + io.Writer + RuneWriter + io.StringWriter + } + writerNYYWrapper struct{ writerNYY } +) + +func (w writerNYYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } + +// YNN + +type ( + writerYNN interface { + io.Writer + io.ByteWriter + } + writerYNNWrapper struct{ writerYNN } +) + +func (w writerYNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } +func (w writerYNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// YNY + +type ( + writerYNY interface { + io.Writer + io.ByteWriter + io.StringWriter + } + writerYNYWrapper struct{ writerYNY } +) + +func (w writerYNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } + +// YYN + +type ( + writerYYN interface { + io.Writer + io.ByteWriter + RuneWriter + } + writerYYNWrapper struct{ writerYYN } +) + +func (w writerYYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NewAllWriter wraps an io.Writer turning it in to an AllWriter. If +// the io.Writer already has any of the other write methods, then its +// native version of those methods are used. +func NewAllWriter(inner io.Writer) AllWriter { + switch inner := inner.(type) { + // 3 Y bits + case AllWriter: // YYY: + return inner + // 2 Y bits + case writerNYY: + return writerNYYWrapper{writerNYY: inner} + case writerYNY: + return writerYNYWrapper{writerYNY: inner} + case writerYYN: + return writerYYNWrapper{writerYYN: inner} + // 1 Y bit + case writerNNY: + return writerNNYWrapper{writerNNY: inner} + case writerNYN: + return writerNYNWrapper{writerNYN: inner} + case writerYNN: + return writerYNNWrapper{writerYNN: inner} + // 0 Y bits + default: // NNN: + return writerNNNWrapper{writerNNN: inner} + } +} + +// discard ///////////////////////////////////////////////////////////////////// + +// Discard is like io.Discard, but implements AllWriter. +var Discard = discard{} + +type discard struct{} + +func (discard) Write(p []byte) (int, error) { return len(p), nil } +func (discard) WriteByte(b byte) error { return nil } +func (discard) WriteRune(r rune) (int, error) { return 0, nil } +func (discard) WriteString(s string) (int, error) { return len(s), nil } diff --git a/internal/hex.go b/internal/hex.go deleted file mode 100644 index 62a818f..0000000 --- a/internal/hex.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -const Hex = "0123456789abcdef" - -func HexToInt(c rune) (byte, bool) { - switch { - case '0' <= c && c <= '9': - return byte(c) - '0', true - case 'a' <= c && c <= 'f': - return byte(c) - 'a' + 10, true - case 'A' <= c && c <= 'F': - return byte(c) - 'A' + 10, true - default: - return 0, false - } -} diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go new file mode 100644 index 0000000..3ed5f01 --- /dev/null +++ b/internal/jsonparse/hex.go @@ -0,0 +1,20 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonparse + +const Hex = "0123456789abcdef" + +func HexToInt(c rune) (byte, bool) { + switch { + case '0' <= c && c <= '9': + return byte(c) - '0', true + case 'a' <= c && c <= 'f': + return byte(c) - 'a' + 10, true + case 'A' <= c && c <= 'F': + return byte(c) - 'A' + 10, true + default: + return 0, false + } +} diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go new file mode 100644 index 0000000..7d97be0 --- /dev/null +++ b/internal/jsonparse/parse.go @@ -0,0 +1,845 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonparse + +import ( + "errors" + "fmt" + "io" + iofs "io/fs" + "strings" +) + +var ErrParserExceededMaxDepth = errors.New("exceeded max depth") + +// RuneType is the classification of a rune when parsing JSON input. +// A Parser, rather than grouping runes into tokens and classifying +// tokens, classifies runes directly. +type RuneType uint8 + +const ( + RuneTypeError RuneType = iota + + RuneTypeSpace // whitespace + + RuneTypeObjectBeg // '{' + RuneTypeObjectColon // ':' + RuneTypeObjectComma // ',' + RuneTypeObjectEnd // '}' + + RuneTypeArrayBeg // '[' + RuneTypeArrayComma // ',' + RuneTypeArrayEnd // ']' + + RuneTypeStringBeg // opening '"' + RuneTypeStringChar // normal character + RuneTypeStringEsc // backslash + RuneTypeStringEsc1 // single-char after a backslash + RuneTypeStringEscU // \uABCD : u + RuneTypeStringEscUA // \uABCD : A + RuneTypeStringEscUB // \uABCD : B + RuneTypeStringEscUC // \uABCD : C + RuneTypeStringEscUD // \uABCD : D + RuneTypeStringEnd // closing '"' + + RuneTypeNumberIntNeg + RuneTypeNumberIntZero // leading zero only; non-leading zeros are IntDig, not IntZero + RuneTypeNumberIntDig + RuneTypeNumberFracDot + RuneTypeNumberFracDig + RuneTypeNumberExpE + RuneTypeNumberExpSign + RuneTypeNumberExpDig + + RuneTypeTrueT + RuneTypeTrueR + RuneTypeTrueU + RuneTypeTrueE + + RuneTypeFalseF + RuneTypeFalseA + RuneTypeFalseL + RuneTypeFalseS + RuneTypeFalseE + + RuneTypeNullN + RuneTypeNullU + RuneTypeNullL1 + RuneTypeNullL2 + + RuneTypeEOF + + // Not a real rune type, but used as a stack state. + runeTypeAny +) + +// GoString implements fmt.GoStringer. +// +//nolint:dupl // False positive due to similarly shaped AST. +func (t RuneType) GoString() string { + str, ok := map[RuneType]string{ + RuneTypeError: "RuneTypeError", + + RuneTypeSpace: "RuneTypeSpace", + + RuneTypeObjectBeg: "RuneTypeObjectBeg", + RuneTypeObjectColon: "RuneTypeObjectColon", + RuneTypeObjectComma: "RuneTypeObjectComma", + RuneTypeObjectEnd: "RuneTypeObjectEnd", + + RuneTypeArrayBeg: "RuneTypeArrayBeg", + RuneTypeArrayComma: "RuneTypeArrayComma", + RuneTypeArrayEnd: "RuneTypeArrayEnd", + + RuneTypeStringBeg: "RuneTypeStringBeg", + RuneTypeStringChar: "RuneTypeStringChar", + RuneTypeStringEsc: "RuneTypeStringEsc", + RuneTypeStringEsc1: "RuneTypeStringEsc1", + RuneTypeStringEscU: "RuneTypeStringEscU", + RuneTypeStringEscUA: "RuneTypeStringEscUA", + RuneTypeStringEscUB: "RuneTypeStringEscUB", + RuneTypeStringEscUC: "RuneTypeStringEscUC", + RuneTypeStringEscUD: "RuneTypeStringEscUD", + RuneTypeStringEnd: "RuneTypeStringEnd", + + RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", + RuneTypeNumberIntZero: "RuneTypeNumberIntZero", + RuneTypeNumberIntDig: "RuneTypeNumberIntDig", + RuneTypeNumberFracDot: "RuneTypeNumberFracDot", + RuneTypeNumberFracDig: "RuneTypeNumberFracDig", + RuneTypeNumberExpE: "RuneTypeNumberExpE", + RuneTypeNumberExpSign: "RuneTypeNumberExpSign", + RuneTypeNumberExpDig: "RuneTypeNumberExpDig", + + RuneTypeTrueT: "RuneTypeTrueT", + RuneTypeTrueR: "RuneTypeTrueR", + RuneTypeTrueU: "RuneTypeTrueU", + RuneTypeTrueE: "RuneTypeTrueE", + + RuneTypeFalseF: "RuneTypeFalseF", + RuneTypeFalseA: "RuneTypeFalseA", + RuneTypeFalseL: "RuneTypeFalseL", + RuneTypeFalseS: "RuneTypeFalseS", + RuneTypeFalseE: "RuneTypeFalseE", + + RuneTypeNullN: "RuneTypeNullN", + RuneTypeNullU: "RuneTypeNullU", + RuneTypeNullL1: "RuneTypeNullL1", + RuneTypeNullL2: "RuneTypeNullL2", + + RuneTypeEOF: "RuneTypeEOF", + + runeTypeAny: "runeTypeAny", + }[t] + if ok { + return str + } + return fmt.Sprintf("RuneType(%d)", t) +} + +// String implements fmt.Stringer. +// +//nolint:dupl // False positive due to similarly shaped AST. +func (t RuneType) String() string { + str, ok := map[RuneType]string{ + RuneTypeError: "x", + + RuneTypeSpace: " ", + + RuneTypeObjectBeg: "{", + RuneTypeObjectColon: ":", + RuneTypeObjectComma: "o", + RuneTypeObjectEnd: "}", + + RuneTypeArrayBeg: "[", + RuneTypeArrayComma: "a", + RuneTypeArrayEnd: "]", + + RuneTypeStringBeg: "\"", + RuneTypeStringChar: "c", + RuneTypeStringEsc: "\\", + RuneTypeStringEsc1: "b", + RuneTypeStringEscU: "u", + RuneTypeStringEscUA: "A", + RuneTypeStringEscUB: "B", + RuneTypeStringEscUC: "C", + RuneTypeStringEscUD: "D", + RuneTypeStringEnd: "»", + + RuneTypeNumberIntNeg: "-", + RuneTypeNumberIntZero: "0", + RuneTypeNumberIntDig: "1", + RuneTypeNumberFracDot: ".", + RuneTypeNumberFracDig: "2", + RuneTypeNumberExpE: "e", + RuneTypeNumberExpSign: "+", + RuneTypeNumberExpDig: "3", + + RuneTypeTrueT: "𝕥", // double-struck + RuneTypeTrueR: "𝕣", + RuneTypeTrueU: "𝕦", + RuneTypeTrueE: "𝕖", + + RuneTypeFalseF: "𝔣", // fraktur + RuneTypeFalseA: "𝔞", + RuneTypeFalseL: "𝔩", + RuneTypeFalseS: "𝔰", + RuneTypeFalseE: "𝔢", + + RuneTypeNullN: "ⓝ", // circled + RuneTypeNullU: "ⓤ", + RuneTypeNullL1: "ⓛ", + RuneTypeNullL2: "Ⓛ", // +uppercase + + RuneTypeEOF: "$", + + runeTypeAny: "?", + }[t] + if ok { + return str + } + return fmt.Sprintf("<%d>", t) +} + +func (t RuneType) JSONType() string { + return map[RuneType]string{ + RuneTypeObjectBeg: "object", + RuneTypeArrayBeg: "array", + RuneTypeStringBeg: "string", + RuneTypeNumberIntNeg: "number", + RuneTypeNumberIntZero: "number", + RuneTypeNumberIntDig: "number", + RuneTypeTrueT: "true", + RuneTypeFalseF: "false", + RuneTypeNullN: "null", + RuneTypeEOF: "eof", + }[t] +} + +// IsNumber returns whether the RuneType is one of the +// RuneTypeNumberXXX values. +func (t RuneType) IsNumber() bool { + return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig +} + +// Parser is the low-level JSON parser that powers both *Decoder and +// *ReEncoder. +type Parser struct { + // Setting MaxError to a value greater than 0 causes + // HandleRune to return ErrParserExceededMaxDepth if + // objects/arrays become nested more deeply than this. + MaxDepth int + + initialized bool + + err error + closed bool + + // We reuse RuneTypes to store the stack. The base idea is: + // stack items are "the most recently read stack-relevant + // RuneType". + // + // The stack starts out with the special pseudo-RuneType + // `runeTypeAny` that means we're willing to accept any + // element type; an empty stack means that we have reached the + // end of the top-level element and should accept no more + // input except for whitespace. + // + // The "normal" stack-relevant RuneTypes are: + // + // "\uABC for strings + // -01.2e+3 for numbers + // 𝕥𝕣𝕦 for "true" + // 𝔣𝔞𝔩𝔰 for "false" + // ⓝⓤⓛ for "null" + // + // Objects and arrays break the "most recently read RuneType" + // rule; they need some special assignments: + // + // { object: waiting for key to start or '}' + // » object: reading key / waiting for colon + // o object: reading value / waiting for ',' or '}' + // + // [ array: waiting for item to start or ']' + // a array: reading item / waiting for ',' or ']' + // + // Within each element type, the stack item is replaced, not pushed. + // + // (Keep each of these examples in-sync with parse_test.go.) + // + // For example, given the input string + // + // {"x":"y","a":"b"} + // + // The stack would be + // + // stack processed + // ? + // { { + // »" {" + // »" {"x + // » {"x" + // o? {"x": + // o" {"x":" + // o" {"x":"y + // o {"x":"y" + // { {"x":"y", + // »" {"x":"y"," + // »" {"x":"y","a + // » {"x":"y","a" + // o? {"x":"y","a": + // o" {"x":"y","a":" + // o" {"x":"y","a":"b + // o {"x":"y","a":"b" + // {"x":"y","a":"b"} + // + // Or, given the input string + // + // ["x","y"] + // + // The stack would be + // + // stack processed + // ? + // [ [ + // a" [" + // a" ["x + // a ["x" + // a? ["x", + // a" ["x"," + // a" ["x","y + // a ["x","y" + // ["x","y"] + stack []RuneType + + barriers []barrier +} + +type barrier struct { + closed bool + stack []RuneType +} + +func (par *Parser) init() { + if !par.initialized { + par.initialized = true + par.pushState(runeTypeAny) + } +} + +func (par *Parser) pushState(state RuneType) RuneType { + par.stack = append(par.stack, state) + return state +} + +func (par *Parser) replaceState(state RuneType) RuneType { + par.stack[len(par.stack)-1] = state + return state +} + +func (par *Parser) popState() { + par.stack = par.stack[:len(par.stack)-1] +} + +func (par *Parser) stackString() string { + par.init() + var buf strings.Builder + for _, s := range par.stack { + buf.WriteString(s.String()) + } + return buf.String() +} + +func (par *Parser) depth() int { + n := len(par.stack) + for _, barrier := range par.barriers { + n += len(barrier.stack) + } + return n +} + +func (par *Parser) StackIsEmpty() bool { + if len(par.barriers) > 0 { + return false + } + if len(par.stack) == 0 { + return true + } + return len(par.stack) == 1 && par.stack[0] == runeTypeAny +} + +func (par *Parser) StackSize() int { + return len(par.stack) +} + +// Reset all Parser state. +func (par *Parser) Reset() { + *par = Parser{ + MaxDepth: par.MaxDepth, + } +} + +// PushReadBarrier causes the parser to expect EOF once the end of the +// element that is started by the current top-of-stack is reached, +// until this is un-done with PopBarrier. It essentially turns the +// parser in to a sub-parser. +// +// PushReadBarrier may only be called at the beginning of an element, +// whether that be +// +// - runeTypeAny +// - RuneTypeObjectBeg +// - RuneTypeArrayBeg +// - RuneTypeStringBeg +// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig +// - RuneTypeTrueT +// - RuneTypeFalseF +// - RuneTypeNullN +func (par *Parser) PushReadBarrier() { + // Sanity checking. + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushReadBarrier call: empty stack")) + } + curState := par.stack[len(par.stack)-1] + switch curState { + case runeTypeAny, + RuneTypeObjectBeg, + RuneTypeArrayBeg, + RuneTypeStringBeg, + RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, + RuneTypeTrueT, + RuneTypeFalseF, + RuneTypeNullN: + // OK + default: + panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState)) + } + // Actually push. + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack[:len(par.stack)-1], + }) + par.stack = []RuneType{curState} +} + +// PushWriteBarrier causes the parser to expect EOF once the end of +// the about-to-start element is reached, until this is un-done with +// PopBarrier. It essentially turns the parser in to a sub-parser. +// +// PushWriteBarrier may only be called at the places where an element +// of any type may start: +// +// - runeTypeAny for top-level and object-value elements +// - RuneTypeArrayBeg for array-item elements +// +// PushWriteBarrier signals intent to write an element; if it is +// called in a place where an element is optional (at the beginning of +// an array), it becomes a syntax error to not write the element. +func (par *Parser) PushWriteBarrier() { + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushWriteBarrier call: empty stack")) + } + switch par.stack[len(par.stack)-1] { + case runeTypeAny: + par.popState() + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + case RuneTypeArrayBeg: + par.replaceState(RuneTypeArrayComma) + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + default: + panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) + } +} + +// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. +func (par *Parser) PopBarrier() { + if len(par.barriers) == 0 { + panic(errors.New("illegal PopBarrier call: empty barrier stack")) + } + barrier := par.barriers[len(par.barriers)-1] + par.barriers = par.barriers[:len(par.barriers)-1] + par.closed = barrier.closed + par.stack = append(barrier.stack, par.stack...) +} + +// HandleEOF feeds EOF to the Parser. The returned RuneType is either +// RuneTypeEOF or RuneTypeError. +// +// An error is returned if and only if the RuneType is RuneTypeError. +// Returns io/fs.ErrClosed if .HandleEOF() has previously been called +// (and .Reset() has not been called since). +// +// Once RuneTypeError or RuneTypeEOF has been returned, it will keep +// being returned from both .HandleRune(c) and .HandleEOF() until +// .Reset() is called. +// +// RuneTypeEOF indicates that a complete JSON document has been read. +func (par *Parser) HandleEOF() (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed + } + defer func() { + par.closed = true + }() + if par.err != nil { + return RuneTypeError, par.err + } + par.init() + switch len(par.stack) { + case 0: + return RuneTypeEOF, nil + case 1: + switch { + case par.stack[0].IsNumber(): + if _, err := par.HandleRune('\n'); err == nil { + return RuneTypeEOF, nil + } + case par.stack[0] == runeTypeAny: + par.err = io.EOF + return RuneTypeError, par.err + } + fallthrough + default: + par.err = io.ErrUnexpectedEOF + return RuneTypeError, par.err + } +} + +// HandleRune feeds a Unicode rune to the Parser. +// +// An error is returned if and only if the RuneType is RuneTypeError. +// Returns io/fs.ErrClosed if .HandleEOF() has previously been called +// (and .Reset() has not been called since). +// +// Once RuneTypeError or RuneTypeEOF has been returned, it will keep +// being returned from both .HandleRune(c) and .HandleEOF() until +// .Reset() is called. +// +// RuneTypeEOF indicates that the rune cannot be appended to the JSON +// document; a new JSON document must be started in order to process +// that rune. +func (par *Parser) HandleRune(c rune) (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed + } + if par.err != nil { + return RuneTypeError, par.err + } + par.init() + if len(par.stack) == 0 { + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + default: + return RuneTypeEOF, nil + } + } + switch par.stack[len(par.stack)-1] { + // any ///////////////////////////////////////////////////////////////////////////////////// + case runeTypeAny: + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case '{': + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { + return RuneTypeError, ErrParserExceededMaxDepth + } + return par.replaceState(RuneTypeObjectBeg), nil + case '[': + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { + return RuneTypeError, ErrParserExceededMaxDepth + } + return par.replaceState(RuneTypeArrayBeg), nil + case '"': + return par.replaceState(RuneTypeStringBeg), nil + case '-': + return par.replaceState(RuneTypeNumberIntNeg), nil + case '0': + return par.replaceState(RuneTypeNumberIntZero), nil + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + case 't': + return par.replaceState(RuneTypeTrueT), nil + case 'f': + return par.replaceState(RuneTypeFalseF), nil + case 'n': + return par.replaceState(RuneTypeNullN), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c) + } + // object ////////////////////////////////////////////////////////////////////////////////// + case RuneTypeObjectBeg: // waiting for key to start or '}' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case '"': + par.replaceState(RuneTypeStringEnd) + return par.pushState(RuneTypeStringBeg), nil + case '}': + par.popState() + return RuneTypeObjectEnd, nil + default: + return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) + } + case RuneTypeStringEnd: // waiting for ':' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ':': + par.replaceState(RuneTypeObjectComma) + par.pushState(runeTypeAny) + return RuneTypeObjectColon, nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q after object key", c) + } + case RuneTypeObjectComma: // waiting for ',' or '}' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ',': + par.replaceState(RuneTypeObjectBeg) + return RuneTypeObjectComma, nil + case '}': + par.popState() + return RuneTypeObjectEnd, nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c) + } + // array /////////////////////////////////////////////////////////////////////////////////// + case RuneTypeArrayBeg: // waiting for item to start or ']' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ']': + par.popState() + return RuneTypeArrayEnd, nil + default: + par.replaceState(RuneTypeArrayComma) + par.pushState(runeTypeAny) + return par.HandleRune(c) + } + case RuneTypeArrayComma: // waiting for ',' or ']' + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil + case ',': + par.pushState(runeTypeAny) + return RuneTypeArrayComma, nil + case ']': + par.popState() + return RuneTypeArrayEnd, nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q after array element", c) + } + // string ////////////////////////////////////////////////////////////////////////////////// + case RuneTypeStringBeg: // waiting for char or '"' + switch { + case c == '\\': + return par.replaceState(RuneTypeStringEsc), nil + case c == '"': + par.popState() + return RuneTypeStringEnd, nil + case 0x0020 <= c && c <= 0x10FFFF: + return RuneTypeStringChar, nil + default: + return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) + } + case RuneTypeStringEsc: // waiting for escape char + switch c { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEsc1, nil + case 'u': + return par.replaceState(RuneTypeStringEscU), nil + default: + return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) + } + case RuneTypeStringEscU: + if _, ok := HexToInt(c); ok { + return par.replaceState(RuneTypeStringEscUA), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUA: + if _, ok := HexToInt(c); ok { + return par.replaceState(RuneTypeStringEscUB), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUB: + if _, ok := HexToInt(c); ok { + return par.replaceState(RuneTypeStringEscUC), nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + case RuneTypeStringEscUC: + if _, ok := HexToInt(c); ok { + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil + } else { + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) + } + // number ////////////////////////////////////////////////////////////////////////////////// + // + // Here's a flattened drawing of the syntax diagram from www.json.org : + // + // [------------ integer ----------][-- fraction ---][-------- exponent -------] + // >─╮─────╭─╮─"0"───────╭─────────╭──╮─────────────╭──╮───────────────────────╭─> + // │ │ │ │ │ │ │ │ │ + // ╰─"-"─╯ ╰─digit 1-9─╯─╭digit╮─╯ ╰─"."─╭digit╮─╯ ╰─"e"─╭─╮─────╭─╭digit╮─╯ + // ╰──<──╯ ╰──<──╯ │ │ │ │ ╰──<──╯ + // ╰─"E"─╯ ╰─"-"─╯ + // │ │ + // ╰─"+"─╯ + // + // Now here it is slightly redrawn, and with each distinct state our + // parser can be in marked with a single-capital-letter: + // + // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] + // >─A─╮───────╭──╮─"0"─────────C─╭─────────╮──────────────────╭─────────╮──────────────────────────╭─> + // │ │ │ │ │ │ │ │ + // ╰─"-"─B─╯ ╰─digit 1-9─╭─D─╯─digit╮ ╰─"."─E─digit──╭─F─╯─digit╮ ╰─"e"─╭─G─╮─────╭─╭digit─I─╯ + // ╰────<─────╯ ╰────<─────╯ │ │ │ H ╰────<───╯ + // ╰─"E"─╯ ╰─"-"─╯ + // │ │ + // ╰─"+"─╯ + // + // You may notice that each of these states may be uniquely identified + // by the last-read RuneType: + // + // A = (nothing yet) + // B = IntNeg + // C = IntZero + // D = IntDig + // E = FracDot + // F = FracDig + // G = ExpE + // H = ExpSign + // I = ExpDig + // + // The 'A' state is part of the runeTypeAny case above, and + // the remainder follow: + case RuneTypeNumberIntNeg: // B + switch c { + case '0': + return par.replaceState(RuneTypeNumberIntZero), nil + case '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberIntZero: // C + switch c { + case '.': + return par.replaceState(RuneTypeNumberFracDot), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.HandleRune(c) + } + case RuneTypeNumberIntDig: // D + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberIntDig), nil + case '.': + return par.replaceState(RuneTypeNumberFracDot), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.HandleRune(c) + } + case RuneTypeNumberFracDot: // E + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberFracDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberFracDig: // F + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberFracDig), nil + case 'e', 'E': + return par.replaceState(RuneTypeNumberExpE), nil + default: + par.popState() + return par.HandleRune(c) + } + case RuneTypeNumberExpE: // G + switch c { + case '-', '+': + return par.replaceState(RuneTypeNumberExpSign), nil + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberExpSign: // H + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) + } + case RuneTypeNumberExpDig: // I + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return par.replaceState(RuneTypeNumberExpDig), nil + default: + par.popState() + return par.HandleRune(c) + } + // literals //////////////////////////////////////////////////////////////////////////////// + // true + case RuneTypeTrueT: + return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) + case RuneTypeTrueR: + return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) + case RuneTypeTrueU: + return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) + // false + case RuneTypeFalseF: + return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) + case RuneTypeFalseA: + return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) + case RuneTypeFalseL: + return par.expectRune(c, 's', RuneTypeFalseS, "false", false) + case RuneTypeFalseS: + return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) + // null + case RuneTypeNullN: + return par.expectRune(c, 'u', RuneTypeNullU, "null", false) + case RuneTypeNullU: + return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) + case RuneTypeNullL1: + return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) + default: + panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString())) + } +} + +func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { + if c != exp { + return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp) + } + if pop { + par.popState() + return typ, nil + } else { + return par.replaceState(typ), nil + } +} diff --git a/internal/jsonparse/parse_test.go b/internal/jsonparse/parse_test.go new file mode 100644 index 0000000..e531daf --- /dev/null +++ b/internal/jsonparse/parse_test.go @@ -0,0 +1,78 @@ +// Copyright (C) 2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsonparse + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParserHandleRune(t *testing.T) { + t.Parallel() + type testcase struct { + Input string + ExpStack []string + } + testcases := map[string]testcase{ + // Keep these test-cases in-sync with the examples in parse.go. + "object": { + Input: `{"x":"y","a":"b"}`, + ExpStack: []string{ + // st,// processed + `?`, + `{`, // { + `»"`, // {" + `»"`, // {"x + `»`, // {"x" + `o?`, // {"x": + `o"`, // {"x":" + `o"`, // {"x":"y + `o`, // {"x":"y" + `{`, // {"x":"y", + `»"`, // {"x":"y"," + `»"`, // {"x":"y","a + `»`, // {"x":"y","a" + `o?`, // {"x":"y","a": + `o"`, // {"x":"y","a":" + `o"`, // {"x":"y","a":"b + `o`, // {"x":"y","a":"b" + ``, // {"x":"y","a":"b"} + }, + }, + "array": { + Input: `["x","y"]`, + ExpStack: []string{ + // st,// processed + `?`, + `[`, // [ + `a"`, // [" + `a"`, // ["x + `a`, // ["x" + `a?`, // ["x", + `a"`, // ["x"," + `a"`, // ["x","y + `a`, // ["x","y" + ``, // ["x","y"] + }, + }, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + var par Parser + if !assert.Equal(t, len(tc.Input)+1, len(tc.ExpStack)) { + return + } + for i, r := range tc.Input { + assert.Equal(t, tc.ExpStack[i], par.stackString()) + _, err := par.HandleRune(r) + assert.NoError(t, err) + assert.Equal(t, tc.ExpStack[i+1], par.stackString()) + } + }) + } +} diff --git a/internal/jsontags/borrowed_tags.go b/internal/jsontags/borrowed_tags.go new file mode 100644 index 0000000..aa94b9b --- /dev/null +++ b/internal/jsontags/borrowed_tags.go @@ -0,0 +1,40 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// SPDX-License-Identifier: BSD-3-Clause + +package jsontags // MODIFIED: changed package name + +import ( + "strings" +) + +// tagOptions is the string following a comma in a struct field's "json" +// tag, or the empty string. It does not include the leading comma. +type tagOptions string + +// parseTag splits a struct field's json tag into its name and +// comma-separated options. +func parseTag(tag string) (string, tagOptions) { + tag, opt, _ := strings.Cut(tag, ",") + return tag, tagOptions(opt) +} + +// Contains reports whether a comma-separated list of options +// contains a particular substr flag. substr must be surrounded by a +// string boundary or commas. +func (o tagOptions) Contains(optionName string) bool { + if len(o) == 0 { + return false + } + s := string(o) + for s != "" { + var name string + name, s, _ = strings.Cut(s, ",") + if name == optionName { + return true + } + } + return false +} diff --git a/internal/jsontags/tags.go b/internal/jsontags/tags.go new file mode 100644 index 0000000..386824d --- /dev/null +++ b/internal/jsontags/tags.go @@ -0,0 +1,7 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsontags + +var ParseTag = parseTag diff --git a/internal/jsontest/jsontest.go b/internal/jsontest/jsontest.go new file mode 100644 index 0000000..fbc775d --- /dev/null +++ b/internal/jsontest/jsontest.go @@ -0,0 +1,14 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package jsontest + +import ( + "io" +) + +var ( + EncodeStringFromBytes func(io.Writer, []byte) + EncodeStringFromString func(io.Writer, string) +) diff --git a/internal/parse.go b/internal/parse.go deleted file mode 100644 index 36db4a9..0000000 --- a/internal/parse.go +++ /dev/null @@ -1,845 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "errors" - "fmt" - "io" - iofs "io/fs" - "strings" -) - -var ErrParserExceededMaxDepth = errors.New("exceeded max depth") - -// RuneType is the classification of a rune when parsing JSON input. -// A Parser, rather than grouping runes into tokens and classifying -// tokens, classifies runes directly. -type RuneType uint8 - -const ( - RuneTypeError RuneType = iota - - RuneTypeSpace // whitespace - - RuneTypeObjectBeg // '{' - RuneTypeObjectColon // ':' - RuneTypeObjectComma // ',' - RuneTypeObjectEnd // '}' - - RuneTypeArrayBeg // '[' - RuneTypeArrayComma // ',' - RuneTypeArrayEnd // ']' - - RuneTypeStringBeg // opening '"' - RuneTypeStringChar // normal character - RuneTypeStringEsc // backslash - RuneTypeStringEsc1 // single-char after a backslash - RuneTypeStringEscU // \uABCD : u - RuneTypeStringEscUA // \uABCD : A - RuneTypeStringEscUB // \uABCD : B - RuneTypeStringEscUC // \uABCD : C - RuneTypeStringEscUD // \uABCD : D - RuneTypeStringEnd // closing '"' - - RuneTypeNumberIntNeg - RuneTypeNumberIntZero // leading zero only; non-leading zeros are IntDig, not IntZero - RuneTypeNumberIntDig - RuneTypeNumberFracDot - RuneTypeNumberFracDig - RuneTypeNumberExpE - RuneTypeNumberExpSign - RuneTypeNumberExpDig - - RuneTypeTrueT - RuneTypeTrueR - RuneTypeTrueU - RuneTypeTrueE - - RuneTypeFalseF - RuneTypeFalseA - RuneTypeFalseL - RuneTypeFalseS - RuneTypeFalseE - - RuneTypeNullN - RuneTypeNullU - RuneTypeNullL1 - RuneTypeNullL2 - - RuneTypeEOF - - // Not a real rune type, but used as a stack state. - runeTypeAny -) - -// GoString implements fmt.GoStringer. -// -//nolint:dupl // False positive due to similarly shaped AST. -func (t RuneType) GoString() string { - str, ok := map[RuneType]string{ - RuneTypeError: "RuneTypeError", - - RuneTypeSpace: "RuneTypeSpace", - - RuneTypeObjectBeg: "RuneTypeObjectBeg", - RuneTypeObjectColon: "RuneTypeObjectColon", - RuneTypeObjectComma: "RuneTypeObjectComma", - RuneTypeObjectEnd: "RuneTypeObjectEnd", - - RuneTypeArrayBeg: "RuneTypeArrayBeg", - RuneTypeArrayComma: "RuneTypeArrayComma", - RuneTypeArrayEnd: "RuneTypeArrayEnd", - - RuneTypeStringBeg: "RuneTypeStringBeg", - RuneTypeStringChar: "RuneTypeStringChar", - RuneTypeStringEsc: "RuneTypeStringEsc", - RuneTypeStringEsc1: "RuneTypeStringEsc1", - RuneTypeStringEscU: "RuneTypeStringEscU", - RuneTypeStringEscUA: "RuneTypeStringEscUA", - RuneTypeStringEscUB: "RuneTypeStringEscUB", - RuneTypeStringEscUC: "RuneTypeStringEscUC", - RuneTypeStringEscUD: "RuneTypeStringEscUD", - RuneTypeStringEnd: "RuneTypeStringEnd", - - RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", - RuneTypeNumberIntZero: "RuneTypeNumberIntZero", - RuneTypeNumberIntDig: "RuneTypeNumberIntDig", - RuneTypeNumberFracDot: "RuneTypeNumberFracDot", - RuneTypeNumberFracDig: "RuneTypeNumberFracDig", - RuneTypeNumberExpE: "RuneTypeNumberExpE", - RuneTypeNumberExpSign: "RuneTypeNumberExpSign", - RuneTypeNumberExpDig: "RuneTypeNumberExpDig", - - RuneTypeTrueT: "RuneTypeTrueT", - RuneTypeTrueR: "RuneTypeTrueR", - RuneTypeTrueU: "RuneTypeTrueU", - RuneTypeTrueE: "RuneTypeTrueE", - - RuneTypeFalseF: "RuneTypeFalseF", - RuneTypeFalseA: "RuneTypeFalseA", - RuneTypeFalseL: "RuneTypeFalseL", - RuneTypeFalseS: "RuneTypeFalseS", - RuneTypeFalseE: "RuneTypeFalseE", - - RuneTypeNullN: "RuneTypeNullN", - RuneTypeNullU: "RuneTypeNullU", - RuneTypeNullL1: "RuneTypeNullL1", - RuneTypeNullL2: "RuneTypeNullL2", - - RuneTypeEOF: "RuneTypeEOF", - - runeTypeAny: "runeTypeAny", - }[t] - if ok { - return str - } - return fmt.Sprintf("RuneType(%d)", t) -} - -// String implements fmt.Stringer. -// -//nolint:dupl // False positive due to similarly shaped AST. -func (t RuneType) String() string { - str, ok := map[RuneType]string{ - RuneTypeError: "x", - - RuneTypeSpace: " ", - - RuneTypeObjectBeg: "{", - RuneTypeObjectColon: ":", - RuneTypeObjectComma: "o", - RuneTypeObjectEnd: "}", - - RuneTypeArrayBeg: "[", - RuneTypeArrayComma: "a", - RuneTypeArrayEnd: "]", - - RuneTypeStringBeg: "\"", - RuneTypeStringChar: "c", - RuneTypeStringEsc: "\\", - RuneTypeStringEsc1: "b", - RuneTypeStringEscU: "u", - RuneTypeStringEscUA: "A", - RuneTypeStringEscUB: "B", - RuneTypeStringEscUC: "C", - RuneTypeStringEscUD: "D", - RuneTypeStringEnd: "»", - - RuneTypeNumberIntNeg: "-", - RuneTypeNumberIntZero: "0", - RuneTypeNumberIntDig: "1", - RuneTypeNumberFracDot: ".", - RuneTypeNumberFracDig: "2", - RuneTypeNumberExpE: "e", - RuneTypeNumberExpSign: "+", - RuneTypeNumberExpDig: "3", - - RuneTypeTrueT: "𝕥", // double-struck - RuneTypeTrueR: "𝕣", - RuneTypeTrueU: "𝕦", - RuneTypeTrueE: "𝕖", - - RuneTypeFalseF: "𝔣", // fraktur - RuneTypeFalseA: "𝔞", - RuneTypeFalseL: "𝔩", - RuneTypeFalseS: "𝔰", - RuneTypeFalseE: "𝔢", - - RuneTypeNullN: "ⓝ", // circled - RuneTypeNullU: "ⓤ", - RuneTypeNullL1: "ⓛ", - RuneTypeNullL2: "Ⓛ", // +uppercase - - RuneTypeEOF: "$", - - runeTypeAny: "?", - }[t] - if ok { - return str - } - return fmt.Sprintf("<%d>", t) -} - -func (t RuneType) JSONType() string { - return map[RuneType]string{ - RuneTypeObjectBeg: "object", - RuneTypeArrayBeg: "array", - RuneTypeStringBeg: "string", - RuneTypeNumberIntNeg: "number", - RuneTypeNumberIntZero: "number", - RuneTypeNumberIntDig: "number", - RuneTypeTrueT: "true", - RuneTypeFalseF: "false", - RuneTypeNullN: "null", - RuneTypeEOF: "eof", - }[t] -} - -// IsNumber returns whether the RuneType is one of the -// RuneTypeNumberXXX values. -func (t RuneType) IsNumber() bool { - return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig -} - -// Parser is the low-level JSON parser that powers both *Decoder and -// *ReEncoder. -type Parser struct { - // Setting MaxError to a value greater than 0 causes - // HandleRune to return ErrParserExceededMaxDepth if - // objects/arrays become nested more deeply than this. - MaxDepth int - - initialized bool - - err error - closed bool - - // We reuse RuneTypes to store the stack. The base idea is: - // stack items are "the most recently read stack-relevant - // RuneType". - // - // The stack starts out with the special pseudo-RuneType - // `runeTypeAny` that means we're willing to accept any - // element type; an empty stack means that we have reached the - // end of the top-level element and should accept no more - // input except for whitespace. - // - // The "normal" stack-relevant RuneTypes are: - // - // "\uABC for strings - // -01.2e+3 for numbers - // 𝕥𝕣𝕦 for "true" - // 𝔣𝔞𝔩𝔰 for "false" - // ⓝⓤⓛ for "null" - // - // Objects and arrays break the "most recently read RuneType" - // rule; they need some special assignments: - // - // { object: waiting for key to start or '}' - // » object: reading key / waiting for colon - // o object: reading value / waiting for ',' or '}' - // - // [ array: waiting for item to start or ']' - // a array: reading item / waiting for ',' or ']' - // - // Within each element type, the stack item is replaced, not pushed. - // - // (Keep each of these examples in-sync with parse_test.go.) - // - // For example, given the input string - // - // {"x":"y","a":"b"} - // - // The stack would be - // - // stack processed - // ? - // { { - // »" {" - // »" {"x - // » {"x" - // o? {"x": - // o" {"x":" - // o" {"x":"y - // o {"x":"y" - // { {"x":"y", - // »" {"x":"y"," - // »" {"x":"y","a - // » {"x":"y","a" - // o? {"x":"y","a": - // o" {"x":"y","a":" - // o" {"x":"y","a":"b - // o {"x":"y","a":"b" - // {"x":"y","a":"b"} - // - // Or, given the input string - // - // ["x","y"] - // - // The stack would be - // - // stack processed - // ? - // [ [ - // a" [" - // a" ["x - // a ["x" - // a? ["x", - // a" ["x"," - // a" ["x","y - // a ["x","y" - // ["x","y"] - stack []RuneType - - barriers []barrier -} - -type barrier struct { - closed bool - stack []RuneType -} - -func (par *Parser) init() { - if !par.initialized { - par.initialized = true - par.pushState(runeTypeAny) - } -} - -func (par *Parser) pushState(state RuneType) RuneType { - par.stack = append(par.stack, state) - return state -} - -func (par *Parser) replaceState(state RuneType) RuneType { - par.stack[len(par.stack)-1] = state - return state -} - -func (par *Parser) popState() { - par.stack = par.stack[:len(par.stack)-1] -} - -func (par *Parser) stackString() string { - par.init() - var buf strings.Builder - for _, s := range par.stack { - buf.WriteString(s.String()) - } - return buf.String() -} - -func (par *Parser) depth() int { - n := len(par.stack) - for _, barrier := range par.barriers { - n += len(barrier.stack) - } - return n -} - -func (par *Parser) StackIsEmpty() bool { - if len(par.barriers) > 0 { - return false - } - if len(par.stack) == 0 { - return true - } - return len(par.stack) == 1 && par.stack[0] == runeTypeAny -} - -func (par *Parser) StackSize() int { - return len(par.stack) -} - -// Reset all Parser state. -func (par *Parser) Reset() { - *par = Parser{ - MaxDepth: par.MaxDepth, - } -} - -// PushReadBarrier causes the parser to expect EOF once the end of the -// element that is started by the current top-of-stack is reached, -// until this is un-done with PopBarrier. It essentially turns the -// parser in to a sub-parser. -// -// PushReadBarrier may only be called at the beginning of an element, -// whether that be -// -// - runeTypeAny -// - RuneTypeObjectBeg -// - RuneTypeArrayBeg -// - RuneTypeStringBeg -// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig -// - RuneTypeTrueT -// - RuneTypeFalseF -// - RuneTypeNullN -func (par *Parser) PushReadBarrier() { - // Sanity checking. - par.init() - if len(par.stack) == 0 { - panic(errors.New("illegal PushReadBarrier call: empty stack")) - } - curState := par.stack[len(par.stack)-1] - switch curState { - case runeTypeAny, - RuneTypeObjectBeg, - RuneTypeArrayBeg, - RuneTypeStringBeg, - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, - RuneTypeTrueT, - RuneTypeFalseF, - RuneTypeNullN: - // OK - default: - panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState)) - } - // Actually push. - par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack[:len(par.stack)-1], - }) - par.stack = []RuneType{curState} -} - -// PushWriteBarrier causes the parser to expect EOF once the end of -// the about-to-start element is reached, until this is un-done with -// PopBarrier. It essentially turns the parser in to a sub-parser. -// -// PushWriteBarrier may only be called at the places where an element -// of any type may start: -// -// - runeTypeAny for top-level and object-value elements -// - RuneTypeArrayBeg for array-item elements -// -// PushWriteBarrier signals intent to write an element; if it is -// called in a place where an element is optional (at the beginning of -// an array), it becomes a syntax error to not write the element. -func (par *Parser) PushWriteBarrier() { - par.init() - if len(par.stack) == 0 { - panic(errors.New("illegal PushWriteBarrier call: empty stack")) - } - switch par.stack[len(par.stack)-1] { - case runeTypeAny: - par.popState() - par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, - }) - par.stack = []RuneType{runeTypeAny} - case RuneTypeArrayBeg: - par.replaceState(RuneTypeArrayComma) - par.barriers = append(par.barriers, barrier{ - closed: par.closed, - stack: par.stack, - }) - par.stack = []RuneType{runeTypeAny} - default: - panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) - } -} - -// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. -func (par *Parser) PopBarrier() { - if len(par.barriers) == 0 { - panic(errors.New("illegal PopBarrier call: empty barrier stack")) - } - barrier := par.barriers[len(par.barriers)-1] - par.barriers = par.barriers[:len(par.barriers)-1] - par.closed = barrier.closed - par.stack = append(barrier.stack, par.stack...) -} - -// HandleEOF feeds EOF to the Parser. The returned RuneType is either -// RuneTypeEOF or RuneTypeError. -// -// An error is returned if and only if the RuneType is RuneTypeError. -// Returns io/fs.ErrClosed if .HandleEOF() has previously been called -// (and .Reset() has not been called since). -// -// Once RuneTypeError or RuneTypeEOF has been returned, it will keep -// being returned from both .HandleRune(c) and .HandleEOF() until -// .Reset() is called. -// -// RuneTypeEOF indicates that a complete JSON document has been read. -func (par *Parser) HandleEOF() (RuneType, error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - defer func() { - par.closed = true - }() - if par.err != nil { - return RuneTypeError, par.err - } - par.init() - switch len(par.stack) { - case 0: - return RuneTypeEOF, nil - case 1: - switch { - case par.stack[0].IsNumber(): - if _, err := par.HandleRune('\n'); err == nil { - return RuneTypeEOF, nil - } - case par.stack[0] == runeTypeAny: - par.err = io.EOF - return RuneTypeError, par.err - } - fallthrough - default: - par.err = io.ErrUnexpectedEOF - return RuneTypeError, par.err - } -} - -// HandleRune feeds a Unicode rune to the Parser. -// -// An error is returned if and only if the RuneType is RuneTypeError. -// Returns io/fs.ErrClosed if .HandleEOF() has previously been called -// (and .Reset() has not been called since). -// -// Once RuneTypeError or RuneTypeEOF has been returned, it will keep -// being returned from both .HandleRune(c) and .HandleEOF() until -// .Reset() is called. -// -// RuneTypeEOF indicates that the rune cannot be appended to the JSON -// document; a new JSON document must be started in order to process -// that rune. -func (par *Parser) HandleRune(c rune) (RuneType, error) { - if par.closed { - return RuneTypeError, iofs.ErrClosed - } - if par.err != nil { - return RuneTypeError, par.err - } - par.init() - if len(par.stack) == 0 { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - return RuneTypeEOF, nil - } - } - switch par.stack[len(par.stack)-1] { - // any ///////////////////////////////////////////////////////////////////////////////////// - case runeTypeAny: - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '{': - if par.MaxDepth > 0 && par.depth() > par.MaxDepth { - return RuneTypeError, ErrParserExceededMaxDepth - } - return par.replaceState(RuneTypeObjectBeg), nil - case '[': - if par.MaxDepth > 0 && par.depth() > par.MaxDepth { - return RuneTypeError, ErrParserExceededMaxDepth - } - return par.replaceState(RuneTypeArrayBeg), nil - case '"': - return par.replaceState(RuneTypeStringBeg), nil - case '-': - return par.replaceState(RuneTypeNumberIntNeg), nil - case '0': - return par.replaceState(RuneTypeNumberIntZero), nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - case 't': - return par.replaceState(RuneTypeTrueT), nil - case 'f': - return par.replaceState(RuneTypeFalseF), nil - case 'n': - return par.replaceState(RuneTypeNullN), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c) - } - // object ////////////////////////////////////////////////////////////////////////////////// - case RuneTypeObjectBeg: // waiting for key to start or '}' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case '"': - par.replaceState(RuneTypeStringEnd) - return par.pushState(RuneTypeStringBeg), nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) - } - case RuneTypeStringEnd: // waiting for ':' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ':': - par.replaceState(RuneTypeObjectComma) - par.pushState(runeTypeAny) - return RuneTypeObjectColon, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after object key", c) - } - case RuneTypeObjectComma: // waiting for ',' or '}' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.replaceState(RuneTypeObjectBeg) - return RuneTypeObjectComma, nil - case '}': - par.popState() - return RuneTypeObjectEnd, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c) - } - // array /////////////////////////////////////////////////////////////////////////////////// - case RuneTypeArrayBeg: // waiting for item to start or ']' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - par.replaceState(RuneTypeArrayComma) - par.pushState(runeTypeAny) - return par.HandleRune(c) - } - case RuneTypeArrayComma: // waiting for ',' or ']' - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - case ',': - par.pushState(runeTypeAny) - return RuneTypeArrayComma, nil - case ']': - par.popState() - return RuneTypeArrayEnd, nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q after array element", c) - } - // string ////////////////////////////////////////////////////////////////////////////////// - case RuneTypeStringBeg: // waiting for char or '"' - switch { - case c == '\\': - return par.replaceState(RuneTypeStringEsc), nil - case c == '"': - par.popState() - return RuneTypeStringEnd, nil - case 0x0020 <= c && c <= 0x10FFFF: - return RuneTypeStringChar, nil - default: - return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) - } - case RuneTypeStringEsc: // waiting for escape char - switch c { - case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEsc1, nil - case 'u': - return par.replaceState(RuneTypeStringEscU), nil - default: - return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) - } - case RuneTypeStringEscU: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUA: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUB: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - case RuneTypeStringEscUC: - if _, ok := HexToInt(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { - return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) - } - // number ////////////////////////////////////////////////////////////////////////////////// - // - // Here's a flattened drawing of the syntax diagram from www.json.org : - // - // [------------ integer ----------][-- fraction ---][-------- exponent -------] - // >─╮─────╭─╮─"0"───────╭─────────╭──╮─────────────╭──╮───────────────────────╭─> - // │ │ │ │ │ │ │ │ │ - // ╰─"-"─╯ ╰─digit 1-9─╯─╭digit╮─╯ ╰─"."─╭digit╮─╯ ╰─"e"─╭─╮─────╭─╭digit╮─╯ - // ╰──<──╯ ╰──<──╯ │ │ │ │ ╰──<──╯ - // ╰─"E"─╯ ╰─"-"─╯ - // │ │ - // ╰─"+"─╯ - // - // Now here it is slightly redrawn, and with each distinct state our - // parser can be in marked with a single-capital-letter: - // - // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] - // >─A─╮───────╭──╮─"0"─────────C─╭─────────╮──────────────────╭─────────╮──────────────────────────╭─> - // │ │ │ │ │ │ │ │ - // ╰─"-"─B─╯ ╰─digit 1-9─╭─D─╯─digit╮ ╰─"."─E─digit──╭─F─╯─digit╮ ╰─"e"─╭─G─╮─────╭─╭digit─I─╯ - // ╰────<─────╯ ╰────<─────╯ │ │ │ H ╰────<───╯ - // ╰─"E"─╯ ╰─"-"─╯ - // │ │ - // ╰─"+"─╯ - // - // You may notice that each of these states may be uniquely identified - // by the last-read RuneType: - // - // A = (nothing yet) - // B = IntNeg - // C = IntZero - // D = IntDig - // E = FracDot - // F = FracDig - // G = ExpE - // H = ExpSign - // I = ExpDig - // - // The 'A' state is part of the runeTypeAny case above, and - // the remainder follow: - case RuneTypeNumberIntNeg: // B - switch c { - case '0': - return par.replaceState(RuneTypeNumberIntZero), nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberIntZero: // C - switch c { - case '.': - return par.replaceState(RuneTypeNumberFracDot), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberIntDig: // D - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberIntDig), nil - case '.': - return par.replaceState(RuneTypeNumberFracDot), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberFracDot: // E - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberFracDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberFracDig: // F - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberFracDig), nil - case 'e', 'E': - return par.replaceState(RuneTypeNumberExpE), nil - default: - par.popState() - return par.HandleRune(c) - } - case RuneTypeNumberExpE: // G - switch c { - case '-', '+': - return par.replaceState(RuneTypeNumberExpSign), nil - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberExpSign: // H - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c) - } - case RuneTypeNumberExpDig: // I - switch c { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return par.replaceState(RuneTypeNumberExpDig), nil - default: - par.popState() - return par.HandleRune(c) - } - // literals //////////////////////////////////////////////////////////////////////////////// - // true - case RuneTypeTrueT: - return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) - case RuneTypeTrueR: - return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) - case RuneTypeTrueU: - return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) - // false - case RuneTypeFalseF: - return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) - case RuneTypeFalseA: - return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) - case RuneTypeFalseL: - return par.expectRune(c, 's', RuneTypeFalseS, "false", false) - case RuneTypeFalseS: - return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) - // null - case RuneTypeNullN: - return par.expectRune(c, 'u', RuneTypeNullU, "null", false) - case RuneTypeNullU: - return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) - case RuneTypeNullL1: - return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) - default: - panic(fmt.Errorf(`invalid stack: "%s"`, par.stackString())) - } -} - -func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { - if c != exp { - return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp) - } - if pop { - par.popState() - return typ, nil - } else { - return par.replaceState(typ), nil - } -} diff --git a/internal/parse_test.go b/internal/parse_test.go deleted file mode 100644 index 34977fb..0000000 --- a/internal/parse_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (C) 2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestParserHandleRune(t *testing.T) { - t.Parallel() - type testcase struct { - Input string - ExpStack []string - } - testcases := map[string]testcase{ - // Keep these test-cases in-sync with the examples in parse.go. - "object": { - Input: `{"x":"y","a":"b"}`, - ExpStack: []string{ - // st,// processed - `?`, - `{`, // { - `»"`, // {" - `»"`, // {"x - `»`, // {"x" - `o?`, // {"x": - `o"`, // {"x":" - `o"`, // {"x":"y - `o`, // {"x":"y" - `{`, // {"x":"y", - `»"`, // {"x":"y"," - `»"`, // {"x":"y","a - `»`, // {"x":"y","a" - `o?`, // {"x":"y","a": - `o"`, // {"x":"y","a":" - `o"`, // {"x":"y","a":"b - `o`, // {"x":"y","a":"b" - ``, // {"x":"y","a":"b"} - }, - }, - "array": { - Input: `["x","y"]`, - ExpStack: []string{ - // st,// processed - `?`, - `[`, // [ - `a"`, // [" - `a"`, // ["x - `a`, // ["x" - `a?`, // ["x", - `a"`, // ["x"," - `a"`, // ["x","y - `a`, // ["x","y" - ``, // ["x","y"] - }, - }, - } - for tcName, tc := range testcases { - tc := tc - t.Run(tcName, func(t *testing.T) { - t.Parallel() - var par Parser - if !assert.Equal(t, len(tc.Input)+1, len(tc.ExpStack)) { - return - } - for i, r := range tc.Input { - assert.Equal(t, tc.ExpStack[i], par.stackString()) - _, err := par.HandleRune(r) - assert.NoError(t, err) - assert.Equal(t, tc.ExpStack[i+1], par.stackString()) - } - }) - } -} diff --git a/internal/tags.go b/internal/tags.go deleted file mode 100644 index bdf1f72..0000000 --- a/internal/tags.go +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -var ParseTag = parseTag diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 b/internal/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 deleted file mode 100644 index c3774e7..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/06e2c9db80a08b67fad7f1a4606dc7419750995a57828aa25ea57fe7099d5c03 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("0000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a b/internal/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a deleted file mode 100644 index 4c861db..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/24f53a36f8832fec65cac0aa0f3b43ec1c904414fa6d38f6fc288b0bbd69588a +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 b/internal/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 deleted file mode 100644 index 3d32e14..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/2d49311ef22319f70a3590a86b406b9f2565987a4a3b6d7660ddc308b5b2fae2 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd b/internal/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd deleted file mode 100644 index d08ef92..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/356e28f5914a0f16f3cef81330f1d92060be4d694a93dedd654bf48743a7d2bd +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 b/internal/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 deleted file mode 100644 index a96f559..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/582528ddfad69eb57775199a43e0f9fd5c94bba343ce7bb6724d4ebafe311ed4 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("0") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 b/internal/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 deleted file mode 100644 index 87c024d..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/60c81ee499a7f1e151b66b08f0a4ff81edd7cb53d00dce8ee0eaf31683996026 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("0000000000000000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d b/internal/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d deleted file mode 100644 index 959401e..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/66498f377f38b53eebe1ceaa4a53e4de01a04efc02ac9cfda60f9815f80e9b9d +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b b/internal/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b deleted file mode 100644 index bd1ae59..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/731951fe84fa6f3a7f6ee8adaa585d4f6a01f359a04737e51ffc70f16f480b9b +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c b/internal/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c deleted file mode 100644 index 09e0ad2..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/7d6367ba84cd18550920b5202cd1269174416ce32769c7f59376e76b7dd3129c +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c b/internal/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c deleted file mode 100644 index e8000f3..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/8727b16d337d7b8187433233f3a90099024e580a6ba319ea2bf539880c50bd7c +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c b/internal/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c deleted file mode 100644 index aac6b7d..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/9201a772731543760326638b8915f80863feab0ba0108183b3093934bdc0420c +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f b/internal/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f deleted file mode 100644 index f3bf6d9..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/92f75f690317ace34aeaae3fe39f5f2ff9830777253ff371c5ef6f403a0f8f0f +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 b/internal/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 deleted file mode 100644 index 2e7f462..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/93d6f7bc0d93f998c7b7fe654ff46010d6fa76f0a142c3523c42454f8ad10b07 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a b/internal/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a deleted file mode 100644 index c541f52..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/a7450fd77fc7c53cc5bd136874415dddfff5c586e662f21420caa7a94131a56a +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c b/internal/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c deleted file mode 100644 index 5d56f29..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/a95d2a0f87501a643d54218d2ad8112204672cc1fb30be297853616788208a5c +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 b/internal/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 deleted file mode 100644 index 4b4d59f..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/beed435aa2fee4819eab217543561dfd8001d4a44f53ceb664aaba86cebfaf21 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 b/internal/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 deleted file mode 100644 index ef9f9d4..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/c2501043394e49f2477408be5ef9389790e33ed1886073dec445d4cf05bcd4b4 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 b/internal/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 deleted file mode 100644 index 67322c7..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/caf81e9797b19c76c1fc4dbf537d4d81f389524539f402d13aa01f93a65ac7e9 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 b/internal/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 deleted file mode 100644 index f195330..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/cc90a4a40ae9b3beac70baf6d7821a5a6f3a90cabb033575790be91723593680 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\x04000000000000\r00000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a b/internal/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a deleted file mode 100644 index 5b0d392..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/ec72f669d648d8d9b9f75a3b303897c59b11e4bfb7622f25ff251a92f182bc2a +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("0000000000000000000000000000000000000000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 b/internal/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 deleted file mode 100644 index a389d3c..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/f34630c44c11bb13d27531927c5c1e65d159b70f39cd161da0dba348c1221ab3 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("00000") diff --git a/internal/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 b/internal/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 deleted file mode 100644 index 17d10b2..0000000 --- a/internal/testdata/fuzz/FuzzBase64Decoder/fd67efb09d433a1351a201281dbf6568628b4135c35c811dd9bce97620a75d43 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("000000000000") diff --git a/reencode.go b/reencode.go index 3e9cf37..232d91d 100644 --- a/reencode.go +++ b/reencode.go @@ -10,7 +10,8 @@ import ( "io" "unicode/utf8" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -71,7 +72,7 @@ type ReEncoderConfig struct { func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ ReEncoderConfig: cfg, - out: internal.NewAllWriter(out), + out: fastio.NewAllWriter(out), specu: new(speculation), } } @@ -86,7 +87,7 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { ReEncoderConfig - out internal.AllWriter + out fastio.AllWriter // state: .Write's and .WriteString's utf8-decoding buffer buf [utf8.UTFMax]byte @@ -94,13 +95,13 @@ type ReEncoder struct { // state: .WriteRune err error - par internal.Parser + par jsonparse.Parser written int inputPos int64 // state: .handleRune - lastNonSpace internal.RuneType - lastNonSpaceNonEOF internal.RuneType + lastNonSpace jsonparse.RuneType + lastNonSpaceNonEOF jsonparse.RuneType wasNumber bool curIndent int uhex [4]byte // "\uABCD"-encoded characters in strings @@ -135,15 +136,15 @@ func (specu *speculation) Reset() { type inputTuple struct { c rune - t internal.RuneType + t jsonparse.RuneType stackSize int } // public API ////////////////////////////////////////////////////////////////// var ( - _ internal.AllWriter = (*ReEncoder)(nil) - _ io.Closer = (*ReEncoder)(nil) + _ fastio.AllWriter = (*ReEncoder)(nil) + _ io.Closer = (*ReEncoder)(nil) ) // Write implements io.Writer; it does what you'd expect. @@ -208,7 +209,7 @@ func (enc *ReEncoder) WriteString(p string) (int, error) { // WriteByte implements io.ByteWriter; it does what you'd expect. func (enc *ReEncoder) WriteByte(b byte) error { - return internal.WriteByte(enc, b) + return fastio.WriteByte(enc, b) } // Close implements io.Closer; it does what you'd expect, mostly. @@ -230,7 +231,7 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRune(0, internal.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRune(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, Offset: enc.inputPos, @@ -274,7 +275,7 @@ rehandle: return enc.written, enc.err } enc.err = enc.handleRune(c, t, enc.stackSize()) - if enc.err == nil && t == internal.RuneTypeEOF { + if enc.err == nil && t == jsonparse.RuneTypeEOF { if enc.AllowMultipleValues && len(enc.barriers) == 0 { enc.par.Reset() goto rehandle @@ -319,7 +320,7 @@ func (enc *ReEncoder) stackSize() int { return sz } -func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { +func (enc *ReEncoder) handleRune(c rune, t jsonparse.RuneType, stackSize int) error { if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } @@ -327,7 +328,7 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err // main if !enc.specu.speculating { // not speculating switch t { - case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { return err } @@ -385,7 +386,7 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) err return nil } -func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error { +func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t jsonparse.RuneType) error { err, shouldHandle := enc.handleRunePre(c, t) if err != nil { return err @@ -398,9 +399,9 @@ func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error // handleRunePre handles buffered things that need to happen before // the new rune itself is handled. -func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { +func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) { // emit newlines between top-level values - if enc.lastNonSpace == internal.RuneTypeEOF { + if enc.lastNonSpace == jsonparse.RuneTypeEOF { switch { case enc.wasNumber && t.IsNumber(): if err := enc.emitByte('\n'); err != nil { @@ -415,10 +416,10 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits - case internal.RuneTypeNumberFracDot: + case jsonparse.RuneTypeNumberFracDot: enc.fracZeros = 0 - case internal.RuneTypeNumberFracDig: - if c == '0' && enc.lastNonSpace == internal.RuneTypeNumberFracDig { + case jsonparse.RuneTypeNumberFracDig: + if c == '0' && enc.lastNonSpace == jsonparse.RuneTypeNumberFracDig { enc.fracZeros++ return nil, false } @@ -432,9 +433,9 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } } switch t { // trim leading '0's from the exponent-part, but don't remove all digits - case internal.RuneTypeNumberExpE, internal.RuneTypeNumberExpSign: + case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign: enc.expZero = true - case internal.RuneTypeNumberExpDig: + case jsonparse.RuneTypeNumberExpDig: if c == '0' && enc.expZero { return nil, false } @@ -451,18 +452,18 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // whitespace switch { case enc.Compact: - if t == internal.RuneTypeSpace { + if t == jsonparse.RuneTypeSpace { return nil, false } case enc.Indent != "": switch t { - case internal.RuneTypeSpace: + case jsonparse.RuneTypeSpace: // let us manage whitespace, don't pass it through return nil, false - case internal.RuneTypeObjectEnd, internal.RuneTypeArrayEnd: + case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd: enc.curIndent-- switch enc.lastNonSpace { - case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // collapse default: if err := enc.emitNlIndent(); err != nil { @@ -471,17 +472,17 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } default: switch enc.lastNonSpace { - case internal.RuneTypeObjectBeg, internal.RuneTypeObjectComma, internal.RuneTypeArrayBeg, internal.RuneTypeArrayComma: + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma: if err := enc.emitNlIndent(); err != nil { return err, false } - case internal.RuneTypeObjectColon: + case jsonparse.RuneTypeObjectColon: if err := enc.emitByte(' '); err != nil { return err, false } } switch t { - case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: enc.curIndent++ } } @@ -491,15 +492,15 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } // handleRuneMain handles the new rune itself, not buffered things. -func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { +func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { var err error switch t { - case internal.RuneTypeStringChar: + case jsonparse.RuneTypeStringChar: err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) - case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: + case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: // do nothing - case internal.RuneTypeStringEsc1: + case jsonparse.RuneTypeStringEsc1: switch c { case '"': err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) @@ -520,14 +521,14 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { default: panic("should not happen") } - case internal.RuneTypeStringEscUA: - enc.uhex[0], _ = internal.HexToInt(c) - case internal.RuneTypeStringEscUB: - enc.uhex[1], _ = internal.HexToInt(c) - case internal.RuneTypeStringEscUC: - enc.uhex[2], _ = internal.HexToInt(c) - case internal.RuneTypeStringEscUD: - enc.uhex[3], _ = internal.HexToInt(c) + case jsonparse.RuneTypeStringEscUA: + enc.uhex[0], _ = jsonparse.HexToInt(c) + case jsonparse.RuneTypeStringEscUB: + enc.uhex[1], _ = jsonparse.HexToInt(c) + case jsonparse.RuneTypeStringEscUC: + enc.uhex[2], _ = jsonparse.HexToInt(c) + case jsonparse.RuneTypeStringEscUD: + enc.uhex[3], _ = jsonparse.HexToInt(c) c := 0 | rune(enc.uhex[0])<<12 | rune(enc.uhex[1])<<8 | @@ -535,24 +536,24 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { rune(enc.uhex[3])<<0 err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) - case internal.RuneTypeError: // EOF explicitly stated by .Close() + case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() fallthrough - case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value + case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { case enc.ForceTrailingNewlines && len(enc.barriers) == 0: - t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) + t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) err = enc.emitByte('\n') default: - t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed + t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed } default: err = enc.emitByte(byte(c)) } - if t != internal.RuneTypeSpace { + if t != jsonparse.RuneTypeSpace { enc.lastNonSpace = t - if t != internal.RuneTypeEOF { + if t != jsonparse.RuneTypeEOF { enc.lastNonSpaceNonEOF = t } } diff --git a/struct.go b/struct.go index 81bc22d..5ccb62f 100644 --- a/struct.go +++ b/struct.go @@ -9,7 +9,7 @@ import ( "git.lukeshu.com/go/typedsync" - "git.lukeshu.com/go/lowmemjson/internal" + "git.lukeshu.com/go/lowmemjson/internal/jsontags" ) type structField struct { @@ -157,7 +157,7 @@ func indexStructInner(typ reflect.Type, byPos *[]structField, byName map[string] if tag == "-" { continue } - tagName, opts := internal.ParseTag(tag) + tagName, opts := jsontags.ParseTag(tag) name := tagName if !isValidTag(name) { name = "" -- cgit v1.2.3-2-g168b