diff options
Diffstat (limited to 'internal')
-rw-r--r-- | internal/base64.go | 121 | ||||
-rw-r--r-- | internal/base64_test.go | 44 | ||||
-rw-r--r-- | internal/hex.go | 2 | ||||
-rw-r--r-- | internal/parse.go | 82 |
4 files changed, 215 insertions, 34 deletions
diff --git a/internal/base64.go b/internal/base64.go new file mode 100644 index 0000000..15adbf4 --- /dev/null +++ b/internal/base64.go @@ -0,0 +1,121 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "encoding/base64" + "io" + "strings" +) + +type base64Decoder struct { + dst io.Writer + + err error + pos int64 + buf [4]byte + bufLen int +} + +func NewBase64Decoder(w io.Writer) io.WriteCloser { + return &base64Decoder{ + dst: w, + } +} + +func (dec *base64Decoder) decodeByte(b byte) (byte, bool) { + const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + n := strings.IndexByte(alphabet, b) + if n < 0 { + return 0, false + } + dec.pos++ + return byte(n), true +} + +func (dec *base64Decoder) decodeTuple(a, b, c, d byte) error { + var decodedLen int + var encoded [4]byte + var ok bool + + if a != '=' { + encoded[0], ok = dec.decodeByte(a) + if !ok { + return base64.CorruptInputError(dec.pos) + } + decodedLen++ + } + if b != '=' { + encoded[1], ok = dec.decodeByte(b) + if !ok { + return base64.CorruptInputError(dec.pos) + } + // do NOT increment decodedLen here + } + if c != '=' { + encoded[2], ok = dec.decodeByte(c) + if !ok { + return base64.CorruptInputError(dec.pos) + } + decodedLen++ + } + if d != '=' { + encoded[3], ok = dec.decodeByte(d) + if !ok { + return base64.CorruptInputError(dec.pos) + } + decodedLen++ + } + + val := 0 | + uint32(encoded[0])<<18 | + uint32(encoded[1])<<12 | + uint32(encoded[2])<<6 | + uint32(encoded[3])<<0 + var decoded [3]byte + decoded[0] = byte(val >> 16) + decoded[1] = byte(val >> 8) + decoded[2] = byte(val >> 0) + + _, err := dec.dst.Write(decoded[:decodedLen]) + return err +} + +func (dec *base64Decoder) Write(dat []byte) (int, error) { + if len(dat) == 0 { + return 0, nil + } + if dec.err != nil { + return 0, dec.err + } + var n int + if dec.bufLen > 0 { + n = copy(dec.buf[dec.bufLen:], dat) + dec.bufLen += n + if dec.bufLen < 4 { + return len(dat), nil + } + if err := dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]); err != nil { + dec.err = err + return 0, dec.err + } + } + for ; n+3 < len(dat); n += 4 { + if err := dec.decodeTuple(dat[n], dat[n+1], dat[n+2], dat[n+3]); err != nil { + dec.err = err + return n, dec.err + } + } + dec.bufLen = copy(dec.buf[:], dat[n:]) + return len(dat), nil +} + +func (dec *base64Decoder) Close() error { + if dec.bufLen == 0 { + return nil + } + copy(dec.buf[:], "====") + return dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]) +} diff --git a/internal/base64_test.go b/internal/base64_test.go new file mode 100644 index 0000000..f18bcd7 --- /dev/null +++ b/internal/base64_test.go @@ -0,0 +1,44 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "bytes" + "encoding/base64" + "testing" + + "github.com/stretchr/testify/require" +) + +func b64encode(t *testing.T, input []byte) []byte { + var encoded bytes.Buffer + enc := base64.NewEncoder(base64.StdEncoding, &encoded) + _, err := enc.Write(input) + require.NoError(t, err) + require.NoError(t, enc.Close()) + return encoded.Bytes() +} + +func b64decode(t *testing.T, input []byte) []byte { + var decoded bytes.Buffer + dec := NewBase64Decoder(&decoded) + _, err := dec.Write(input) + require.NoError(t, err) + require.NoError(t, dec.Close()) + return decoded.Bytes() +} + +func FuzzBase64Decoder(f *testing.F) { + f.Fuzz(func(t *testing.T, input []byte) { + encoded := b64encode(t, input) + decoded := b64decode(t, encoded) + t.Logf("input b64 = %q", encoded) + t.Logf("expected decoded = %#v", input) + t.Logf("actual decoded = %#v", decoded) + if !bytes.Equal(input, decoded) { + t.Fail() + } + }) +} diff --git a/internal/hex.go b/internal/hex.go index 9ef78eb..62a818f 100644 --- a/internal/hex.go +++ b/internal/hex.go @@ -6,7 +6,7 @@ package internal const Hex = "0123456789abcdef" -func HexToInt[T interface{ byte | rune }](c T) (byte, bool) { +func HexToInt(c rune) (byte, bool) { switch { case '0' <= c && c <= '9': return byte(c) - '0', true diff --git a/internal/parse.go b/internal/parse.go index 895c930..bb849e7 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -70,9 +70,14 @@ const ( RuneTypeNullL2 RuneTypeEOF + + // Not a real rune type, but used as a stack state. + runeTypeAny ) // GoString implements fmt.GoStringer. +// +//nolint:dupl // False positive due to similarly shaped AST. func (t RuneType) GoString() string { str, ok := map[RuneType]string{ RuneTypeError: "RuneTypeError", @@ -125,6 +130,8 @@ func (t RuneType) GoString() string { RuneTypeNullL2: "RuneTypeNullL2", RuneTypeEOF: "RuneTypeEOF", + + runeTypeAny: "runeTypeAny", }[t] if ok { return str @@ -133,6 +140,8 @@ func (t RuneType) GoString() string { } // String implements fmt.Stringer. +// +//nolint:dupl // False positive due to similarly shaped AST. func (t RuneType) String() string { str, ok := map[RuneType]string{ RuneTypeError: "x", @@ -148,7 +157,7 @@ func (t RuneType) String() string { RuneTypeArrayComma: "a", RuneTypeArrayEnd: "]", - RuneTypeStringBeg: "“", + RuneTypeStringBeg: "\"", RuneTypeStringChar: "c", RuneTypeStringEsc: "\\", RuneTypeStringEsc1: "b", @@ -157,7 +166,7 @@ func (t RuneType) String() string { RuneTypeStringEscUB: "B", RuneTypeStringEscUC: "C", RuneTypeStringEscUD: "D", - RuneTypeStringEnd: "”", + RuneTypeStringEnd: "»", RuneTypeNumberIntNeg: "-", RuneTypeNumberIntZero: "0", @@ -185,6 +194,8 @@ func (t RuneType) String() string { RuneTypeNullL2: "Ⓛ", // +uppercase RuneTypeEOF: "$", + + runeTypeAny: "?", }[t] if ok { return str @@ -226,15 +237,19 @@ type Parser struct { err error closed bool - // We reuse RuneTypes to store the stack. The base idea is - // that, stack items are "the most recently read - // stack-relevant RuneType". + // We reuse RuneTypes to store the stack. The base idea is: + // stack items are "the most recently read stack-relevant + // RuneType". // - // We treat RuneTypeError as a wildcard. + // The stack starts out with the special pseudo-RuneType + // `runeTypeAny` that means we're willing to accept any + // element type; an empty stack means that we have reached the + // end of the top-level element and should accept no more + // input except for whitespace. // - // The "normal"stack-relevant RuneTypes are: + // The "normal" stack-relevant RuneTypes are: // - // “\uABC for strings + // "\uABC for strings // -01.2e+3 for numbers // 𝕥𝕣𝕦 for "true" // 𝔣𝔞𝔩𝔰 for "false" @@ -244,8 +259,7 @@ type Parser struct { // rule; they need some special assignments: // // { object: waiting for key to start or '}' - // ” object: reading key / waiting for colon - // : object: waiting for value to start + // » object: reading key / waiting for colon // o object: reading value / waiting for ',' or '}' // // [ array: waiting for item to start or ']' @@ -261,22 +275,22 @@ type Parser struct { // The stack would be // // stack processed - // x + // ? // { { - // ”“ {" - // ”“ {"x - // ” {"x" - // : {"x": - // o“ {"x":" - // o“ {"x":"y + // »" {" + // »" {"x + // » {"x" + // o? {"x": + // o" {"x":" + // o" {"x":"y // o {"x":"y" // { {"x":"y", - // ”“ {"x":"y"," - // ”“ {"x":"y","a - // ” {"x":"y","a" - // : {"x":"y","a": - // o“ {"x":"y","a":" - // o“ {"x":"y","a":"b + // »" {"x":"y"," + // »" {"x":"y","a + // » {"x":"y","a" + // o? {"x":"y","a": + // o" {"x":"y","a":" + // o" {"x":"y","a":"b // o {"x":"y","a":"b" // {"x":"y","a":"b"} stack []RuneType @@ -286,10 +300,12 @@ func (par *Parser) pushState(state RuneType) RuneType { par.stack = append(par.stack, state) return state } + func (par *Parser) replaceState(state RuneType) RuneType { par.stack[len(par.stack)-1] = state return state } + func (par *Parser) popState() { par.stack = par.stack[:len(par.stack)-1] } @@ -303,7 +319,7 @@ func (par *Parser) stackString() string { } func (par *Parser) StackIsEmpty() bool { - return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == RuneTypeError) + return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny) } // Reset all Parser state. @@ -337,7 +353,7 @@ func (par *Parser) HandleEOF() (RuneType, error) { } if !par.initialized { par.initialized = true - par.pushState(RuneTypeError) + par.pushState(runeTypeAny) } switch len(par.stack) { case 0: @@ -348,7 +364,7 @@ func (par *Parser) HandleEOF() (RuneType, error) { if _, err := par.HandleRune('\n'); err == nil { return RuneTypeEOF, nil } - case par.stack[0] == RuneTypeError: + case par.stack[0] == runeTypeAny: par.err = io.EOF return RuneTypeError, par.err } @@ -381,7 +397,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { } if !par.initialized { par.initialized = true - par.pushState(RuneTypeError) + par.pushState(runeTypeAny) } if len(par.stack) == 0 { switch c { @@ -393,7 +409,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { } switch par.stack[len(par.stack)-1] { // any ///////////////////////////////////////////////////////////////////////////////////// - case RuneTypeError: + case runeTypeAny: switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil @@ -444,7 +460,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeSpace, nil case ':': par.replaceState(RuneTypeObjectComma) - par.pushState(RuneTypeError) + par.pushState(runeTypeAny) return RuneTypeObjectColon, nil default: return RuneTypeError, fmt.Errorf("invalid character %q after object key", c) @@ -472,7 +488,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeArrayEnd, nil default: par.replaceState(RuneTypeArrayComma) - par.pushState(RuneTypeError) + par.pushState(runeTypeAny) return par.HandleRune(c) } case RuneTypeArrayEnd: // waiting for item @@ -481,7 +497,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeSpace, nil default: par.replaceState(RuneTypeArrayComma) - par.pushState(RuneTypeError) + par.pushState(runeTypeAny) return par.HandleRune(c) } case RuneTypeArrayComma: // waiting for ',' or ']' @@ -583,8 +599,8 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { // H = ExpSign // I = ExpDig // - // The 'A' state is part of the RuneTypeError "any" case - // above, and the remainder follow: + // The 'A' state is part of the runeTypeAny case above, and + // the remainder follow: case RuneTypeNumberIntNeg: // B switch c { case '0': |