summaryrefslogtreecommitdiff
path: root/lib/diskio
diff options
context:
space:
mode:
Diffstat (limited to 'lib/diskio')
-rw-r--r--lib/diskio/kmp.go147
-rw-r--r--lib/diskio/kmp_test.go124
-rw-r--r--lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d3
-rw-r--r--lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e6003
-rw-r--r--lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd603
-rw-r--r--lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e419537383
-rw-r--r--lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d3
7 files changed, 0 insertions, 286 deletions
diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go
deleted file mode 100644
index 6949aa4..0000000
--- a/lib/diskio/kmp.go
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
-//
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-package diskio
-
-import (
- "errors"
- "io"
-)
-
-var ErrWildcard = errors.New("wildcard")
-
-func kmpEq2[K ~int64, V comparable](aS Sequence[K, V], aI K, bS Sequence[K, V], bI K) bool {
- aV, aErr := aS.Get(aI)
- bV, bErr := bS.Get(bI)
- if aErr != nil {
- //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
- if aErr == ErrWildcard || errors.Is(aErr, ErrWildcard) {
- aV = bV
- aErr = nil
- } else {
- panic(aErr)
- }
- }
- if bErr != nil {
- //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
- if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) {
- bV = aV
- bErr = nil
- } else {
- panic(bErr)
- }
- }
- if aErr != nil || bErr != nil {
- return false
- }
- return aV == bV
-}
-
-func kmpEq1[K ~int64, V comparable](aV V, bS Sequence[K, V], bI K) bool {
- bV, bErr := bS.Get(bI)
- if bErr != nil {
- //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
- if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) {
- return true
- }
- panic(bErr)
- }
- return aV == bV
-}
-
-// buildKMPTable takes the string 'substr', and returns a table such
-// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and
-// 'substr[:val] == substr[matchLen-val:matchLen]'.
-func buildKMPTable[K ~int64, V comparable](substr Sequence[K, V]) ([]K, error) {
- var substrLen K
- for {
- //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
- if _, err := substr.Get(substrLen); err != nil && !(err == ErrWildcard || errors.Is(err, ErrWildcard)) {
- if errors.Is(err, io.EOF) {
- break
- }
- return nil, err
- }
- substrLen++
- }
-
- table := make([]K, substrLen)
- for j := K(0); j < substrLen; j++ {
- if j == 0 {
- // First entry must always be 0 (in order to
- // satisfy 'val < matchLen').
- continue
- }
- val := table[j-1]
- // not a match; go back
- for val > 0 && !kmpEq2(substr, j, substr, val) {
- val = table[val-1]
- }
- // is a match; go forward
- if kmpEq2(substr, val, substr, j) {
- val++
- }
- table[j] = val
- }
- return table, nil
-}
-
-// IndexAll returns the starting-position of all possibly-overlapping
-// occurrences of 'substr' in the 'str' sequence.
-//
-// Will hop around in 'substr', but will only get the natural sequence
-// [0...) in order from 'str'. When hopping around in 'substr' it
-// assumes that once it has gotten a given index without error, it can
-// continue to do so without error; errors appearing later will cause
-// panics.
-//
-// Will panic if the length of 'substr' is 0.
-//
-// The 'substr' may include wildcard characters by returning
-// ErrWildcard for a position.
-//
-// Uses the Knuth-Morris-Pratt algorithm.
-func IndexAll[K ~int64, V comparable](str, substr Sequence[K, V]) ([]K, error) {
- table, err := buildKMPTable(substr)
- if err != nil {
- return nil, err
- }
- substrLen := K(len(table))
- if substrLen == 0 {
- panic(errors.New("diskio.IndexAll: empty substring"))
- }
-
- var matches []K
- var curMatchBeg K
- var curMatchLen K
-
- for pos := K(0); ; pos++ {
- chr, err := str.Get(pos)
- if err != nil {
- if errors.Is(err, io.EOF) {
- err = nil
- }
- return matches, err
- }
-
- // Consider 'chr'
- for curMatchLen > 0 && !kmpEq1(chr, substr, curMatchLen) { // shorten the match
- overlap := table[curMatchLen-1]
- curMatchBeg += curMatchLen - overlap
- curMatchLen = overlap
- }
- if kmpEq1(chr, substr, curMatchLen) { // lengthen the match
- if curMatchLen == 0 {
- curMatchBeg = pos
- }
- curMatchLen++
- if curMatchLen == substrLen {
- matches = append(matches, curMatchBeg)
- overlap := table[curMatchLen-1]
- curMatchBeg += curMatchLen - overlap
- curMatchLen = overlap
- }
- }
- }
-}
diff --git a/lib/diskio/kmp_test.go b/lib/diskio/kmp_test.go
deleted file mode 100644
index 4d4b3be..0000000
--- a/lib/diskio/kmp_test.go
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
-//
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-package diskio
-
-import (
- "bytes"
- "io"
- "testing"
-
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/require"
-)
-
-func TestBuildKMPTable(t *testing.T) {
- t.Parallel()
- substr := SliceSequence[int64, byte]([]byte("ababaa"))
- table, err := buildKMPTable[int64, byte](substr)
- require.NoError(t, err)
- require.Equal(t,
- []int64{0, 0, 1, 2, 3, 1},
- table)
- for j, val := range table {
- matchLen := j + 1
- assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
- "for table[%d]=%d", j, val)
- }
-}
-
-func FuzzBuildKMPTable(f *testing.F) {
- f.Add([]byte("ababaa"))
- f.Fuzz(func(t *testing.T, substr []byte) {
- table, err := buildKMPTable[int64, byte](SliceSequence[int64, byte](substr))
- require.NoError(t, err)
- require.Equal(t, len(substr), len(table), "length")
- for j, val := range table {
- matchLen := j + 1
- assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
- "for table[%d]=%d", j, val)
- }
- })
-}
-
-func NaiveIndexAll(str, substr []byte) []int64 {
- var matches []int64
- for i := range str {
- if bytes.HasPrefix(str[i:], substr) {
- matches = append(matches, int64(i))
- }
- }
- return matches
-}
-
-func FuzzIndexAll(f *testing.F) {
- f.Fuzz(func(t *testing.T, str, substr []byte) {
- if len(substr) == 0 {
- t.Skip()
- }
- t.Logf("str =%q", str)
- t.Logf("substr=%q", substr)
- exp := NaiveIndexAll(str, substr)
- act, err := IndexAll[int64, byte](
- &ByteReaderSequence[int64]{R: bytes.NewReader(str)},
- SliceSequence[int64, byte](substr))
- assert.NoError(t, err)
- assert.Equal(t, exp, act)
- })
-}
-
-type RESeq string
-
-func (re RESeq) Get(i int64) (byte, error) {
- if i < 0 || i >= int64(len(re)) {
- return 0, io.EOF
- }
- chr := re[int(i)]
- if chr == '.' {
- return 0, ErrWildcard
- }
- return chr, nil
-}
-
-func TestKMPWildcard(t *testing.T) {
- t.Parallel()
- type testcase struct {
- InStr string
- InSubstr string
- ExpMatches []int64
- }
- testcases := map[string]testcase{
- "trivial-bar": {
- InStr: "foo_bar",
- InSubstr: "foo.ba.",
- ExpMatches: []int64{0},
- },
- "trival-baz": {
- InStr: "foo-baz",
- InSubstr: "foo.ba.",
- ExpMatches: []int64{0},
- },
- "suffix": {
- InStr: "foobarbaz",
- InSubstr: "...baz",
- ExpMatches: []int64{3},
- },
- "overlap": {
- InStr: "foobarbar",
- InSubstr: "...bar",
- ExpMatches: []int64{0, 3},
- },
- }
- for tcName, tc := range testcases {
- tc := tc
- t.Run(tcName, func(t *testing.T) {
- t.Parallel()
- matches, err := IndexAll[int64, byte](
- StringSequence[int64](tc.InStr),
- RESeq(tc.InSubstr))
- assert.NoError(t, err)
- assert.Equal(t, tc.ExpMatches, matches)
- })
- }
-}
diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
deleted file mode 100644
index 9d14adf..0000000
--- a/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
+++ /dev/null
@@ -1,3 +0,0 @@
-go test fuzz v1
-[]byte("0")
-[]byte("1")
diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
deleted file mode 100644
index 269f061..0000000
--- a/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
+++ /dev/null
@@ -1,3 +0,0 @@
-go test fuzz v1
-[]byte("0")
-[]byte("0")
diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
deleted file mode 100644
index b8f1562..0000000
--- a/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
+++ /dev/null
@@ -1,3 +0,0 @@
-go test fuzz v1
-[]byte("0")
-[]byte("")
diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
deleted file mode 100644
index be67506..0000000
--- a/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
+++ /dev/null
@@ -1,3 +0,0 @@
-go test fuzz v1
-[]byte("\xde\xdb!")
-[]byte("\xde\xdb")
diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
deleted file mode 100644
index c3bfa37..0000000
--- a/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
+++ /dev/null
@@ -1,3 +0,0 @@
-go test fuzz v1
-[]byte("\x10\x10\x15")
-[]byte("\x10\x15")