summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/base64.go121
-rw-r--r--internal/base64_test.go44
-rw-r--r--internal/hex.go2
-rw-r--r--internal/parse.go82
4 files changed, 215 insertions, 34 deletions
diff --git a/internal/base64.go b/internal/base64.go
new file mode 100644
index 0000000..15adbf4
--- /dev/null
+++ b/internal/base64.go
@@ -0,0 +1,121 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package internal
+
+import (
+ "encoding/base64"
+ "io"
+ "strings"
+)
+
+type base64Decoder struct {
+ dst io.Writer
+
+ err error
+ pos int64
+ buf [4]byte
+ bufLen int
+}
+
+func NewBase64Decoder(w io.Writer) io.WriteCloser {
+ return &base64Decoder{
+ dst: w,
+ }
+}
+
+func (dec *base64Decoder) decodeByte(b byte) (byte, bool) {
+ const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+ n := strings.IndexByte(alphabet, b)
+ if n < 0 {
+ return 0, false
+ }
+ dec.pos++
+ return byte(n), true
+}
+
+func (dec *base64Decoder) decodeTuple(a, b, c, d byte) error {
+ var decodedLen int
+ var encoded [4]byte
+ var ok bool
+
+ if a != '=' {
+ encoded[0], ok = dec.decodeByte(a)
+ if !ok {
+ return base64.CorruptInputError(dec.pos)
+ }
+ decodedLen++
+ }
+ if b != '=' {
+ encoded[1], ok = dec.decodeByte(b)
+ if !ok {
+ return base64.CorruptInputError(dec.pos)
+ }
+ // do NOT increment decodedLen here
+ }
+ if c != '=' {
+ encoded[2], ok = dec.decodeByte(c)
+ if !ok {
+ return base64.CorruptInputError(dec.pos)
+ }
+ decodedLen++
+ }
+ if d != '=' {
+ encoded[3], ok = dec.decodeByte(d)
+ if !ok {
+ return base64.CorruptInputError(dec.pos)
+ }
+ decodedLen++
+ }
+
+ val := 0 |
+ uint32(encoded[0])<<18 |
+ uint32(encoded[1])<<12 |
+ uint32(encoded[2])<<6 |
+ uint32(encoded[3])<<0
+ var decoded [3]byte
+ decoded[0] = byte(val >> 16)
+ decoded[1] = byte(val >> 8)
+ decoded[2] = byte(val >> 0)
+
+ _, err := dec.dst.Write(decoded[:decodedLen])
+ return err
+}
+
+func (dec *base64Decoder) Write(dat []byte) (int, error) {
+ if len(dat) == 0 {
+ return 0, nil
+ }
+ if dec.err != nil {
+ return 0, dec.err
+ }
+ var n int
+ if dec.bufLen > 0 {
+ n = copy(dec.buf[dec.bufLen:], dat)
+ dec.bufLen += n
+ if dec.bufLen < 4 {
+ return len(dat), nil
+ }
+ if err := dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3]); err != nil {
+ dec.err = err
+ return 0, dec.err
+ }
+ }
+ for ; n+3 < len(dat); n += 4 {
+ if err := dec.decodeTuple(dat[n], dat[n+1], dat[n+2], dat[n+3]); err != nil {
+ dec.err = err
+ return n, dec.err
+ }
+ }
+ dec.bufLen = copy(dec.buf[:], dat[n:])
+ return len(dat), nil
+}
+
+func (dec *base64Decoder) Close() error {
+ if dec.bufLen == 0 {
+ return nil
+ }
+ copy(dec.buf[:], "====")
+ return dec.decodeTuple(dec.buf[0], dec.buf[1], dec.buf[2], dec.buf[3])
+}
diff --git a/internal/base64_test.go b/internal/base64_test.go
new file mode 100644
index 0000000..f18bcd7
--- /dev/null
+++ b/internal/base64_test.go
@@ -0,0 +1,44 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package internal
+
+import (
+ "bytes"
+ "encoding/base64"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func b64encode(t *testing.T, input []byte) []byte {
+ var encoded bytes.Buffer
+ enc := base64.NewEncoder(base64.StdEncoding, &encoded)
+ _, err := enc.Write(input)
+ require.NoError(t, err)
+ require.NoError(t, enc.Close())
+ return encoded.Bytes()
+}
+
+func b64decode(t *testing.T, input []byte) []byte {
+ var decoded bytes.Buffer
+ dec := NewBase64Decoder(&decoded)
+ _, err := dec.Write(input)
+ require.NoError(t, err)
+ require.NoError(t, dec.Close())
+ return decoded.Bytes()
+}
+
+func FuzzBase64Decoder(f *testing.F) {
+ f.Fuzz(func(t *testing.T, input []byte) {
+ encoded := b64encode(t, input)
+ decoded := b64decode(t, encoded)
+ t.Logf("input b64 = %q", encoded)
+ t.Logf("expected decoded = %#v", input)
+ t.Logf("actual decoded = %#v", decoded)
+ if !bytes.Equal(input, decoded) {
+ t.Fail()
+ }
+ })
+}
diff --git a/internal/hex.go b/internal/hex.go
index 9ef78eb..62a818f 100644
--- a/internal/hex.go
+++ b/internal/hex.go
@@ -6,7 +6,7 @@ package internal
const Hex = "0123456789abcdef"
-func HexToInt[T interface{ byte | rune }](c T) (byte, bool) {
+func HexToInt(c rune) (byte, bool) {
switch {
case '0' <= c && c <= '9':
return byte(c) - '0', true
diff --git a/internal/parse.go b/internal/parse.go
index 895c930..bb849e7 100644
--- a/internal/parse.go
+++ b/internal/parse.go
@@ -70,9 +70,14 @@ const (
RuneTypeNullL2
RuneTypeEOF
+
+ // Not a real rune type, but used as a stack state.
+ runeTypeAny
)
// GoString implements fmt.GoStringer.
+//
+//nolint:dupl // False positive due to similarly shaped AST.
func (t RuneType) GoString() string {
str, ok := map[RuneType]string{
RuneTypeError: "RuneTypeError",
@@ -125,6 +130,8 @@ func (t RuneType) GoString() string {
RuneTypeNullL2: "RuneTypeNullL2",
RuneTypeEOF: "RuneTypeEOF",
+
+ runeTypeAny: "runeTypeAny",
}[t]
if ok {
return str
@@ -133,6 +140,8 @@ func (t RuneType) GoString() string {
}
// String implements fmt.Stringer.
+//
+//nolint:dupl // False positive due to similarly shaped AST.
func (t RuneType) String() string {
str, ok := map[RuneType]string{
RuneTypeError: "x",
@@ -148,7 +157,7 @@ func (t RuneType) String() string {
RuneTypeArrayComma: "a",
RuneTypeArrayEnd: "]",
- RuneTypeStringBeg: "“",
+ RuneTypeStringBeg: "\"",
RuneTypeStringChar: "c",
RuneTypeStringEsc: "\\",
RuneTypeStringEsc1: "b",
@@ -157,7 +166,7 @@ func (t RuneType) String() string {
RuneTypeStringEscUB: "B",
RuneTypeStringEscUC: "C",
RuneTypeStringEscUD: "D",
- RuneTypeStringEnd: "”",
+ RuneTypeStringEnd: "»",
RuneTypeNumberIntNeg: "-",
RuneTypeNumberIntZero: "0",
@@ -185,6 +194,8 @@ func (t RuneType) String() string {
RuneTypeNullL2: "Ⓛ", // +uppercase
RuneTypeEOF: "$",
+
+ runeTypeAny: "?",
}[t]
if ok {
return str
@@ -226,15 +237,19 @@ type Parser struct {
err error
closed bool
- // We reuse RuneTypes to store the stack. The base idea is
- // that, stack items are "the most recently read
- // stack-relevant RuneType".
+ // We reuse RuneTypes to store the stack. The base idea is:
+ // stack items are "the most recently read stack-relevant
+ // RuneType".
//
- // We treat RuneTypeError as a wildcard.
+ // The stack starts out with the special pseudo-RuneType
+ // `runeTypeAny` that means we're willing to accept any
+ // element type; an empty stack means that we have reached the
+ // end of the top-level element and should accept no more
+ // input except for whitespace.
//
- // The "normal"stack-relevant RuneTypes are:
+ // The "normal" stack-relevant RuneTypes are:
//
- // “\uABC for strings
+ // "\uABC for strings
// -01.2e+3 for numbers
// 𝕥𝕣𝕦 for "true"
// 𝔣𝔞𝔩𝔰 for "false"
@@ -244,8 +259,7 @@ type Parser struct {
// rule; they need some special assignments:
//
// { object: waiting for key to start or '}'
- // ” object: reading key / waiting for colon
- // : object: waiting for value to start
+ // » object: reading key / waiting for colon
// o object: reading value / waiting for ',' or '}'
//
// [ array: waiting for item to start or ']'
@@ -261,22 +275,22 @@ type Parser struct {
// The stack would be
//
// stack processed
- // x
+ // ?
// { {
- // ”“ {"
- // ”“ {"x
- // ” {"x"
- // : {"x":
- // o“ {"x":"
- // o“ {"x":"y
+ // »" {"
+ // »" {"x
+ // » {"x"
+ // o? {"x":
+ // o" {"x":"
+ // o" {"x":"y
// o {"x":"y"
// { {"x":"y",
- // ”“ {"x":"y","
- // ”“ {"x":"y","a
- // ” {"x":"y","a"
- // : {"x":"y","a":
- // o“ {"x":"y","a":"
- // o“ {"x":"y","a":"b
+ // »" {"x":"y","
+ // »" {"x":"y","a
+ // » {"x":"y","a"
+ // o? {"x":"y","a":
+ // o" {"x":"y","a":"
+ // o" {"x":"y","a":"b
// o {"x":"y","a":"b"
// {"x":"y","a":"b"}
stack []RuneType
@@ -286,10 +300,12 @@ func (par *Parser) pushState(state RuneType) RuneType {
par.stack = append(par.stack, state)
return state
}
+
func (par *Parser) replaceState(state RuneType) RuneType {
par.stack[len(par.stack)-1] = state
return state
}
+
func (par *Parser) popState() {
par.stack = par.stack[:len(par.stack)-1]
}
@@ -303,7 +319,7 @@ func (par *Parser) stackString() string {
}
func (par *Parser) StackIsEmpty() bool {
- return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == RuneTypeError)
+ return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny)
}
// Reset all Parser state.
@@ -337,7 +353,7 @@ func (par *Parser) HandleEOF() (RuneType, error) {
}
if !par.initialized {
par.initialized = true
- par.pushState(RuneTypeError)
+ par.pushState(runeTypeAny)
}
switch len(par.stack) {
case 0:
@@ -348,7 +364,7 @@ func (par *Parser) HandleEOF() (RuneType, error) {
if _, err := par.HandleRune('\n'); err == nil {
return RuneTypeEOF, nil
}
- case par.stack[0] == RuneTypeError:
+ case par.stack[0] == runeTypeAny:
par.err = io.EOF
return RuneTypeError, par.err
}
@@ -381,7 +397,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
}
if !par.initialized {
par.initialized = true
- par.pushState(RuneTypeError)
+ par.pushState(runeTypeAny)
}
if len(par.stack) == 0 {
switch c {
@@ -393,7 +409,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
}
switch par.stack[len(par.stack)-1] {
// any /////////////////////////////////////////////////////////////////////////////////////
- case RuneTypeError:
+ case runeTypeAny:
switch c {
case 0x0020, 0x000A, 0x000D, 0x0009:
return RuneTypeSpace, nil
@@ -444,7 +460,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
return RuneTypeSpace, nil
case ':':
par.replaceState(RuneTypeObjectComma)
- par.pushState(RuneTypeError)
+ par.pushState(runeTypeAny)
return RuneTypeObjectColon, nil
default:
return RuneTypeError, fmt.Errorf("invalid character %q after object key", c)
@@ -472,7 +488,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
return RuneTypeArrayEnd, nil
default:
par.replaceState(RuneTypeArrayComma)
- par.pushState(RuneTypeError)
+ par.pushState(runeTypeAny)
return par.HandleRune(c)
}
case RuneTypeArrayEnd: // waiting for item
@@ -481,7 +497,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
return RuneTypeSpace, nil
default:
par.replaceState(RuneTypeArrayComma)
- par.pushState(RuneTypeError)
+ par.pushState(runeTypeAny)
return par.HandleRune(c)
}
case RuneTypeArrayComma: // waiting for ',' or ']'
@@ -583,8 +599,8 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
// H = ExpSign
// I = ExpDig
//
- // The 'A' state is part of the RuneTypeError "any" case
- // above, and the remainder follow:
+ // The 'A' state is part of the runeTypeAny case above, and
+ // the remainder follow:
case RuneTypeNumberIntNeg: // B
switch c {
case '0':