From 77f3c0d7cd21274d00984b72dfce05394d11bdd0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 3 Feb 2023 19:13:22 -0700 Subject: Move KMP IndexAll from diskio to rebuildmappings --- lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go | 149 +++++++++++++++++++++ .../btrfsinspect/rebuildmappings/kmp_test.go | 126 +++++++++++++++++ .../btrfsinspect/rebuildmappings/matchsums.go | 3 +- .../btrfsinspect/rebuildmappings/sumrunwithgaps.go | 5 +- ...6820e3f98383aebadd2af3a22aa248546a228b08451c30d | 3 + ...14e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 | 3 + ...293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 | 3 + ...3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 | 3 + ...7d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d | 3 + lib/diskio/kmp.go | 147 -------------------- lib/diskio/kmp_test.go | 124 ----------------- ...6820e3f98383aebadd2af3a22aa248546a228b08451c30d | 3 - ...14e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 | 3 - ...293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 | 3 - ...3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 | 3 - ...7d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d | 3 - 16 files changed, 293 insertions(+), 291 deletions(-) create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d delete mode 100644 lib/diskio/kmp.go delete mode 100644 lib/diskio/kmp_test.go delete mode 100644 lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d delete mode 100644 lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 delete mode 100644 lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 delete mode 100644 lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 delete mode 100644 lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d (limited to 'lib') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go new file mode 100644 index 0000000..eeaab0c --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go @@ -0,0 +1,149 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "errors" + "io" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +var ErrWildcard = errors.New("wildcard") + +func kmpEq2[K ~int64, V comparable](aS diskio.Sequence[K, V], aI K, bS diskio.Sequence[K, V], bI K) bool { + aV, aErr := aS.Get(aI) + bV, bErr := bS.Get(bI) + if aErr != nil { + //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. + if aErr == ErrWildcard || errors.Is(aErr, ErrWildcard) { + aV = bV + aErr = nil + } else { + panic(aErr) + } + } + if bErr != nil { + //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. + if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { + bV = aV + bErr = nil + } else { + panic(bErr) + } + } + if aErr != nil || bErr != nil { + return false + } + return aV == bV +} + +func kmpEq1[K ~int64, V comparable](aV V, bS diskio.Sequence[K, V], bI K) bool { + bV, bErr := bS.Get(bI) + if bErr != nil { + //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. + if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { + return true + } + panic(bErr) + } + return aV == bV +} + +// buildKMPTable takes the string 'substr', and returns a table such +// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and +// 'substr[:val] == substr[matchLen-val:matchLen]'. +func buildKMPTable[K ~int64, V comparable](substr diskio.Sequence[K, V]) ([]K, error) { + var substrLen K + for { + //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. + if _, err := substr.Get(substrLen); err != nil && !(err == ErrWildcard || errors.Is(err, ErrWildcard)) { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + substrLen++ + } + + table := make([]K, substrLen) + for j := K(0); j < substrLen; j++ { + if j == 0 { + // First entry must always be 0 (in order to + // satisfy 'val < matchLen'). + continue + } + val := table[j-1] + // not a match; go back + for val > 0 && !kmpEq2(substr, j, substr, val) { + val = table[val-1] + } + // is a match; go forward + if kmpEq2(substr, val, substr, j) { + val++ + } + table[j] = val + } + return table, nil +} + +// IndexAll returns the starting-position of all possibly-overlapping +// occurrences of 'substr' in the 'str' sequence. +// +// Will hop around in 'substr', but will only get the natural sequence +// [0...) in order from 'str'. When hopping around in 'substr' it +// assumes that once it has gotten a given index without error, it can +// continue to do so without error; errors appearing later will cause +// panics. +// +// Will panic if the length of 'substr' is 0. +// +// The 'substr' may include wildcard characters by returning +// ErrWildcard for a position. +// +// Uses the Knuth-Morris-Pratt algorithm. +func IndexAll[K ~int64, V comparable](str, substr diskio.Sequence[K, V]) ([]K, error) { + table, err := buildKMPTable(substr) + if err != nil { + return nil, err + } + substrLen := K(len(table)) + if substrLen == 0 { + panic(errors.New("rebuildmappings.IndexAll: empty substring")) + } + + var matches []K + var curMatchBeg K + var curMatchLen K + + for pos := K(0); ; pos++ { + chr, err := str.Get(pos) + if err != nil { + if errors.Is(err, io.EOF) { + err = nil + } + return matches, err + } + + // Consider 'chr' + for curMatchLen > 0 && !kmpEq1(chr, substr, curMatchLen) { // shorten the match + overlap := table[curMatchLen-1] + curMatchBeg += curMatchLen - overlap + curMatchLen = overlap + } + if kmpEq1(chr, substr, curMatchLen) { // lengthen the match + if curMatchLen == 0 { + curMatchBeg = pos + } + curMatchLen++ + if curMatchLen == substrLen { + matches = append(matches, curMatchBeg) + overlap := table[curMatchLen-1] + curMatchBeg += curMatchLen - overlap + curMatchLen = overlap + } + } + } +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go new file mode 100644 index 0000000..910452a --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go @@ -0,0 +1,126 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "bytes" + "io" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +func TestBuildKMPTable(t *testing.T) { + t.Parallel() + substr := diskio.SliceSequence[int64, byte]([]byte("ababaa")) + table, err := buildKMPTable[int64, byte](substr) + require.NoError(t, err) + require.Equal(t, + []int64{0, 0, 1, 2, 3, 1}, + table) + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], + "for table[%d]=%d", j, val) + } +} + +func FuzzBuildKMPTable(f *testing.F) { + f.Add([]byte("ababaa")) + f.Fuzz(func(t *testing.T, substr []byte) { + table, err := buildKMPTable[int64, byte](diskio.SliceSequence[int64, byte](substr)) + require.NoError(t, err) + require.Equal(t, len(substr), len(table), "length") + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], + "for table[%d]=%d", j, val) + } + }) +} + +func NaiveIndexAll(str, substr []byte) []int64 { + var matches []int64 + for i := range str { + if bytes.HasPrefix(str[i:], substr) { + matches = append(matches, int64(i)) + } + } + return matches +} + +func FuzzIndexAll(f *testing.F) { + f.Fuzz(func(t *testing.T, str, substr []byte) { + if len(substr) == 0 { + t.Skip() + } + t.Logf("str =%q", str) + t.Logf("substr=%q", substr) + exp := NaiveIndexAll(str, substr) + act, err := IndexAll[int64, byte]( + &diskio.ByteReaderSequence[int64]{R: bytes.NewReader(str)}, + diskio.SliceSequence[int64, byte](substr)) + assert.NoError(t, err) + assert.Equal(t, exp, act) + }) +} + +type RESeq string + +func (re RESeq) Get(i int64) (byte, error) { + if i < 0 || i >= int64(len(re)) { + return 0, io.EOF + } + chr := re[int(i)] + if chr == '.' { + return 0, ErrWildcard + } + return chr, nil +} + +func TestKMPWildcard(t *testing.T) { + t.Parallel() + type testcase struct { + InStr string + InSubstr string + ExpMatches []int64 + } + testcases := map[string]testcase{ + "trivial-bar": { + InStr: "foo_bar", + InSubstr: "foo.ba.", + ExpMatches: []int64{0}, + }, + "trival-baz": { + InStr: "foo-baz", + InSubstr: "foo.ba.", + ExpMatches: []int64{0}, + }, + "suffix": { + InStr: "foobarbaz", + InSubstr: "...baz", + ExpMatches: []int64{3}, + }, + "overlap": { + InStr: "foobarbar", + InSubstr: "...bar", + ExpMatches: []int64{0, 3}, + }, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + matches, err := IndexAll[int64, byte]( + diskio.StringSequence[int64](tc.InStr), + RESeq(tc.InSubstr)) + assert.NoError(t, err) + assert.Equal(t, tc.ExpMatches, matches) + }) + } +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go index c38314a..eda37bd 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go @@ -14,7 +14,6 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" "git.lukeshu.com/btrfs-progs-ng/lib/maps" ) @@ -37,7 +36,7 @@ func matchBlockGroupSums(ctx context.Context, var matches []btrfsvol.QualifiedPhysicalAddr if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error { - rawMatches, err := diskio.IndexAll[int64, btrfssum.ShortSum](region, bgRun) + rawMatches, err := IndexAll[int64, btrfssum.ShortSum](region, bgRun) if err != nil { return err } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go index 8c1f3ed..d1064d8 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go @@ -14,7 +14,6 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" ) type SumRunWithGaps[Addr btrfsvol.IntAddr[Addr]] struct { @@ -63,7 +62,7 @@ func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, error) } for _, run := range sg.Runs { if run.Addr > addr { - return "", diskio.ErrWildcard + return "", ErrWildcard } if run.Addr.Add(run.Size()) <= addr { continue @@ -71,7 +70,7 @@ func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, error) off := int((addr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize return run.Sums[off : off+run.ChecksumSize], nil } - return "", diskio.ErrWildcard + return "", ErrWildcard } func (sg SumRunWithGaps[Addr]) Walk(ctx context.Context, fn func(Addr, btrfssum.ShortSum) error) error { diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d new file mode 100644 index 0000000..9d14adf --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("1") diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 new file mode 100644 index 0000000..269f061 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("0") diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 new file mode 100644 index 0000000..b8f1562 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("") diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 new file mode 100644 index 0000000..be67506 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("\xde\xdb!") +[]byte("\xde\xdb") diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d new file mode 100644 index 0000000..c3bfa37 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("\x10\x10\x15") +[]byte("\x10\x15") diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go deleted file mode 100644 index 6949aa4..0000000 --- a/lib/diskio/kmp.go +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package diskio - -import ( - "errors" - "io" -) - -var ErrWildcard = errors.New("wildcard") - -func kmpEq2[K ~int64, V comparable](aS Sequence[K, V], aI K, bS Sequence[K, V], bI K) bool { - aV, aErr := aS.Get(aI) - bV, bErr := bS.Get(bI) - if aErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if aErr == ErrWildcard || errors.Is(aErr, ErrWildcard) { - aV = bV - aErr = nil - } else { - panic(aErr) - } - } - if bErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { - bV = aV - bErr = nil - } else { - panic(bErr) - } - } - if aErr != nil || bErr != nil { - return false - } - return aV == bV -} - -func kmpEq1[K ~int64, V comparable](aV V, bS Sequence[K, V], bI K) bool { - bV, bErr := bS.Get(bI) - if bErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { - return true - } - panic(bErr) - } - return aV == bV -} - -// buildKMPTable takes the string 'substr', and returns a table such -// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and -// 'substr[:val] == substr[matchLen-val:matchLen]'. -func buildKMPTable[K ~int64, V comparable](substr Sequence[K, V]) ([]K, error) { - var substrLen K - for { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if _, err := substr.Get(substrLen); err != nil && !(err == ErrWildcard || errors.Is(err, ErrWildcard)) { - if errors.Is(err, io.EOF) { - break - } - return nil, err - } - substrLen++ - } - - table := make([]K, substrLen) - for j := K(0); j < substrLen; j++ { - if j == 0 { - // First entry must always be 0 (in order to - // satisfy 'val < matchLen'). - continue - } - val := table[j-1] - // not a match; go back - for val > 0 && !kmpEq2(substr, j, substr, val) { - val = table[val-1] - } - // is a match; go forward - if kmpEq2(substr, val, substr, j) { - val++ - } - table[j] = val - } - return table, nil -} - -// IndexAll returns the starting-position of all possibly-overlapping -// occurrences of 'substr' in the 'str' sequence. -// -// Will hop around in 'substr', but will only get the natural sequence -// [0...) in order from 'str'. When hopping around in 'substr' it -// assumes that once it has gotten a given index without error, it can -// continue to do so without error; errors appearing later will cause -// panics. -// -// Will panic if the length of 'substr' is 0. -// -// The 'substr' may include wildcard characters by returning -// ErrWildcard for a position. -// -// Uses the Knuth-Morris-Pratt algorithm. -func IndexAll[K ~int64, V comparable](str, substr Sequence[K, V]) ([]K, error) { - table, err := buildKMPTable(substr) - if err != nil { - return nil, err - } - substrLen := K(len(table)) - if substrLen == 0 { - panic(errors.New("diskio.IndexAll: empty substring")) - } - - var matches []K - var curMatchBeg K - var curMatchLen K - - for pos := K(0); ; pos++ { - chr, err := str.Get(pos) - if err != nil { - if errors.Is(err, io.EOF) { - err = nil - } - return matches, err - } - - // Consider 'chr' - for curMatchLen > 0 && !kmpEq1(chr, substr, curMatchLen) { // shorten the match - overlap := table[curMatchLen-1] - curMatchBeg += curMatchLen - overlap - curMatchLen = overlap - } - if kmpEq1(chr, substr, curMatchLen) { // lengthen the match - if curMatchLen == 0 { - curMatchBeg = pos - } - curMatchLen++ - if curMatchLen == substrLen { - matches = append(matches, curMatchBeg) - overlap := table[curMatchLen-1] - curMatchBeg += curMatchLen - overlap - curMatchLen = overlap - } - } - } -} diff --git a/lib/diskio/kmp_test.go b/lib/diskio/kmp_test.go deleted file mode 100644 index 4d4b3be..0000000 --- a/lib/diskio/kmp_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package diskio - -import ( - "bytes" - "io" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestBuildKMPTable(t *testing.T) { - t.Parallel() - substr := SliceSequence[int64, byte]([]byte("ababaa")) - table, err := buildKMPTable[int64, byte](substr) - require.NoError(t, err) - require.Equal(t, - []int64{0, 0, 1, 2, 3, 1}, - table) - for j, val := range table { - matchLen := j + 1 - assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], - "for table[%d]=%d", j, val) - } -} - -func FuzzBuildKMPTable(f *testing.F) { - f.Add([]byte("ababaa")) - f.Fuzz(func(t *testing.T, substr []byte) { - table, err := buildKMPTable[int64, byte](SliceSequence[int64, byte](substr)) - require.NoError(t, err) - require.Equal(t, len(substr), len(table), "length") - for j, val := range table { - matchLen := j + 1 - assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], - "for table[%d]=%d", j, val) - } - }) -} - -func NaiveIndexAll(str, substr []byte) []int64 { - var matches []int64 - for i := range str { - if bytes.HasPrefix(str[i:], substr) { - matches = append(matches, int64(i)) - } - } - return matches -} - -func FuzzIndexAll(f *testing.F) { - f.Fuzz(func(t *testing.T, str, substr []byte) { - if len(substr) == 0 { - t.Skip() - } - t.Logf("str =%q", str) - t.Logf("substr=%q", substr) - exp := NaiveIndexAll(str, substr) - act, err := IndexAll[int64, byte]( - &ByteReaderSequence[int64]{R: bytes.NewReader(str)}, - SliceSequence[int64, byte](substr)) - assert.NoError(t, err) - assert.Equal(t, exp, act) - }) -} - -type RESeq string - -func (re RESeq) Get(i int64) (byte, error) { - if i < 0 || i >= int64(len(re)) { - return 0, io.EOF - } - chr := re[int(i)] - if chr == '.' { - return 0, ErrWildcard - } - return chr, nil -} - -func TestKMPWildcard(t *testing.T) { - t.Parallel() - type testcase struct { - InStr string - InSubstr string - ExpMatches []int64 - } - testcases := map[string]testcase{ - "trivial-bar": { - InStr: "foo_bar", - InSubstr: "foo.ba.", - ExpMatches: []int64{0}, - }, - "trival-baz": { - InStr: "foo-baz", - InSubstr: "foo.ba.", - ExpMatches: []int64{0}, - }, - "suffix": { - InStr: "foobarbaz", - InSubstr: "...baz", - ExpMatches: []int64{3}, - }, - "overlap": { - InStr: "foobarbar", - InSubstr: "...bar", - ExpMatches: []int64{0, 3}, - }, - } - for tcName, tc := range testcases { - tc := tc - t.Run(tcName, func(t *testing.T) { - t.Parallel() - matches, err := IndexAll[int64, byte]( - StringSequence[int64](tc.InStr), - RESeq(tc.InSubstr)) - assert.NoError(t, err) - assert.Equal(t, tc.ExpMatches, matches) - }) - } -} diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d deleted file mode 100644 index 9d14adf..0000000 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("0") -[]byte("1") diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 deleted file mode 100644 index 269f061..0000000 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("0") -[]byte("0") diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 deleted file mode 100644 index b8f1562..0000000 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("0") -[]byte("") diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 deleted file mode 100644 index be67506..0000000 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("\xde\xdb!") -[]byte("\xde\xdb") diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d deleted file mode 100644 index c3bfa37..0000000 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d +++ /dev/null @@ -1,3 +0,0 @@ -go test fuzz v1 -[]byte("\x10\x10\x15") -[]byte("\x10\x15") -- cgit v1.2.3-2-g168b