summaryrefslogtreecommitdiff
path: root/lib/btrfsprogs/btrfsinspect
diff options
context:
space:
mode:
Diffstat (limited to 'lib/btrfsprogs/btrfsinspect')
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go149
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go126
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go3
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go5
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d3
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e6003
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd603
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e419537383
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d3
9 files changed, 293 insertions, 5 deletions
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go
new file mode 100644
index 0000000..eeaab0c
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go
@@ -0,0 +1,149 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "errors"
+ "io"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/diskio"
+)
+
+var ErrWildcard = errors.New("wildcard")
+
+func kmpEq2[K ~int64, V comparable](aS diskio.Sequence[K, V], aI K, bS diskio.Sequence[K, V], bI K) bool {
+ aV, aErr := aS.Get(aI)
+ bV, bErr := bS.Get(bI)
+ if aErr != nil {
+ //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
+ if aErr == ErrWildcard || errors.Is(aErr, ErrWildcard) {
+ aV = bV
+ aErr = nil
+ } else {
+ panic(aErr)
+ }
+ }
+ if bErr != nil {
+ //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
+ if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) {
+ bV = aV
+ bErr = nil
+ } else {
+ panic(bErr)
+ }
+ }
+ if aErr != nil || bErr != nil {
+ return false
+ }
+ return aV == bV
+}
+
+func kmpEq1[K ~int64, V comparable](aV V, bS diskio.Sequence[K, V], bI K) bool {
+ bV, bErr := bS.Get(bI)
+ if bErr != nil {
+ //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
+ if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) {
+ return true
+ }
+ panic(bErr)
+ }
+ return aV == bV
+}
+
+// buildKMPTable takes the string 'substr', and returns a table such
+// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and
+// 'substr[:val] == substr[matchLen-val:matchLen]'.
+func buildKMPTable[K ~int64, V comparable](substr diskio.Sequence[K, V]) ([]K, error) {
+ var substrLen K
+ for {
+ //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is.
+ if _, err := substr.Get(substrLen); err != nil && !(err == ErrWildcard || errors.Is(err, ErrWildcard)) {
+ if errors.Is(err, io.EOF) {
+ break
+ }
+ return nil, err
+ }
+ substrLen++
+ }
+
+ table := make([]K, substrLen)
+ for j := K(0); j < substrLen; j++ {
+ if j == 0 {
+ // First entry must always be 0 (in order to
+ // satisfy 'val < matchLen').
+ continue
+ }
+ val := table[j-1]
+ // not a match; go back
+ for val > 0 && !kmpEq2(substr, j, substr, val) {
+ val = table[val-1]
+ }
+ // is a match; go forward
+ if kmpEq2(substr, val, substr, j) {
+ val++
+ }
+ table[j] = val
+ }
+ return table, nil
+}
+
+// IndexAll returns the starting-position of all possibly-overlapping
+// occurrences of 'substr' in the 'str' sequence.
+//
+// Will hop around in 'substr', but will only get the natural sequence
+// [0...) in order from 'str'. When hopping around in 'substr' it
+// assumes that once it has gotten a given index without error, it can
+// continue to do so without error; errors appearing later will cause
+// panics.
+//
+// Will panic if the length of 'substr' is 0.
+//
+// The 'substr' may include wildcard characters by returning
+// ErrWildcard for a position.
+//
+// Uses the Knuth-Morris-Pratt algorithm.
+func IndexAll[K ~int64, V comparable](str, substr diskio.Sequence[K, V]) ([]K, error) {
+ table, err := buildKMPTable(substr)
+ if err != nil {
+ return nil, err
+ }
+ substrLen := K(len(table))
+ if substrLen == 0 {
+ panic(errors.New("rebuildmappings.IndexAll: empty substring"))
+ }
+
+ var matches []K
+ var curMatchBeg K
+ var curMatchLen K
+
+ for pos := K(0); ; pos++ {
+ chr, err := str.Get(pos)
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ err = nil
+ }
+ return matches, err
+ }
+
+ // Consider 'chr'
+ for curMatchLen > 0 && !kmpEq1(chr, substr, curMatchLen) { // shorten the match
+ overlap := table[curMatchLen-1]
+ curMatchBeg += curMatchLen - overlap
+ curMatchLen = overlap
+ }
+ if kmpEq1(chr, substr, curMatchLen) { // lengthen the match
+ if curMatchLen == 0 {
+ curMatchBeg = pos
+ }
+ curMatchLen++
+ if curMatchLen == substrLen {
+ matches = append(matches, curMatchBeg)
+ overlap := table[curMatchLen-1]
+ curMatchBeg += curMatchLen - overlap
+ curMatchLen = overlap
+ }
+ }
+ }
+}
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go
new file mode 100644
index 0000000..910452a
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go
@@ -0,0 +1,126 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "bytes"
+ "io"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/diskio"
+)
+
+func TestBuildKMPTable(t *testing.T) {
+ t.Parallel()
+ substr := diskio.SliceSequence[int64, byte]([]byte("ababaa"))
+ table, err := buildKMPTable[int64, byte](substr)
+ require.NoError(t, err)
+ require.Equal(t,
+ []int64{0, 0, 1, 2, 3, 1},
+ table)
+ for j, val := range table {
+ matchLen := j + 1
+ assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
+ "for table[%d]=%d", j, val)
+ }
+}
+
+func FuzzBuildKMPTable(f *testing.F) {
+ f.Add([]byte("ababaa"))
+ f.Fuzz(func(t *testing.T, substr []byte) {
+ table, err := buildKMPTable[int64, byte](diskio.SliceSequence[int64, byte](substr))
+ require.NoError(t, err)
+ require.Equal(t, len(substr), len(table), "length")
+ for j, val := range table {
+ matchLen := j + 1
+ assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
+ "for table[%d]=%d", j, val)
+ }
+ })
+}
+
+func NaiveIndexAll(str, substr []byte) []int64 {
+ var matches []int64
+ for i := range str {
+ if bytes.HasPrefix(str[i:], substr) {
+ matches = append(matches, int64(i))
+ }
+ }
+ return matches
+}
+
+func FuzzIndexAll(f *testing.F) {
+ f.Fuzz(func(t *testing.T, str, substr []byte) {
+ if len(substr) == 0 {
+ t.Skip()
+ }
+ t.Logf("str =%q", str)
+ t.Logf("substr=%q", substr)
+ exp := NaiveIndexAll(str, substr)
+ act, err := IndexAll[int64, byte](
+ &diskio.ByteReaderSequence[int64]{R: bytes.NewReader(str)},
+ diskio.SliceSequence[int64, byte](substr))
+ assert.NoError(t, err)
+ assert.Equal(t, exp, act)
+ })
+}
+
+type RESeq string
+
+func (re RESeq) Get(i int64) (byte, error) {
+ if i < 0 || i >= int64(len(re)) {
+ return 0, io.EOF
+ }
+ chr := re[int(i)]
+ if chr == '.' {
+ return 0, ErrWildcard
+ }
+ return chr, nil
+}
+
+func TestKMPWildcard(t *testing.T) {
+ t.Parallel()
+ type testcase struct {
+ InStr string
+ InSubstr string
+ ExpMatches []int64
+ }
+ testcases := map[string]testcase{
+ "trivial-bar": {
+ InStr: "foo_bar",
+ InSubstr: "foo.ba.",
+ ExpMatches: []int64{0},
+ },
+ "trival-baz": {
+ InStr: "foo-baz",
+ InSubstr: "foo.ba.",
+ ExpMatches: []int64{0},
+ },
+ "suffix": {
+ InStr: "foobarbaz",
+ InSubstr: "...baz",
+ ExpMatches: []int64{3},
+ },
+ "overlap": {
+ InStr: "foobarbar",
+ InSubstr: "...bar",
+ ExpMatches: []int64{0, 3},
+ },
+ }
+ for tcName, tc := range testcases {
+ tc := tc
+ t.Run(tcName, func(t *testing.T) {
+ t.Parallel()
+ matches, err := IndexAll[int64, byte](
+ diskio.StringSequence[int64](tc.InStr),
+ RESeq(tc.InSubstr))
+ assert.NoError(t, err)
+ assert.Equal(t, tc.ExpMatches, matches)
+ })
+ }
+}
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go
index c38314a..eda37bd 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go
@@ -14,7 +14,6 @@ import (
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
"git.lukeshu.com/btrfs-progs-ng/lib/containers"
- "git.lukeshu.com/btrfs-progs-ng/lib/diskio"
"git.lukeshu.com/btrfs-progs-ng/lib/maps"
)
@@ -37,7 +36,7 @@ func matchBlockGroupSums(ctx context.Context,
var matches []btrfsvol.QualifiedPhysicalAddr
if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error {
- rawMatches, err := diskio.IndexAll[int64, btrfssum.ShortSum](region, bgRun)
+ rawMatches, err := IndexAll[int64, btrfssum.ShortSum](region, bgRun)
if err != nil {
return err
}
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go
index 8c1f3ed..d1064d8 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go
@@ -14,7 +14,6 @@ import (
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
- "git.lukeshu.com/btrfs-progs-ng/lib/diskio"
)
type SumRunWithGaps[Addr btrfsvol.IntAddr[Addr]] struct {
@@ -63,7 +62,7 @@ func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, error)
}
for _, run := range sg.Runs {
if run.Addr > addr {
- return "", diskio.ErrWildcard
+ return "", ErrWildcard
}
if run.Addr.Add(run.Size()) <= addr {
continue
@@ -71,7 +70,7 @@ func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, error)
off := int((addr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize
return run.Sums[off : off+run.ChecksumSize], nil
}
- return "", diskio.ErrWildcard
+ return "", ErrWildcard
}
func (sg SumRunWithGaps[Addr]) Walk(ctx context.Context, fn func(Addr, btrfssum.ShortSum) error) error {
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
new file mode 100644
index 0000000..9d14adf
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("1")
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
new file mode 100644
index 0000000..269f061
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("0")
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
new file mode 100644
index 0000000..b8f1562
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("")
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
new file mode 100644
index 0000000..be67506
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("\xde\xdb!")
+[]byte("\xde\xdb")
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
new file mode 100644
index 0000000..c3bfa37
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("\x10\x10\x15")
+[]byte("\x10\x15")