From 77f3c0d7cd21274d00984b72dfce05394d11bdd0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 3 Feb 2023 19:13:22 -0700 Subject: Move KMP IndexAll from diskio to rebuildmappings --- .../btrfsinspect/rebuildmappings/kmp_test.go | 126 +++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go (limited to 'lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go new file mode 100644 index 0000000..910452a --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go @@ -0,0 +1,126 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "bytes" + "io" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +func TestBuildKMPTable(t *testing.T) { + t.Parallel() + substr := diskio.SliceSequence[int64, byte]([]byte("ababaa")) + table, err := buildKMPTable[int64, byte](substr) + require.NoError(t, err) + require.Equal(t, + []int64{0, 0, 1, 2, 3, 1}, + table) + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], + "for table[%d]=%d", j, val) + } +} + +func FuzzBuildKMPTable(f *testing.F) { + f.Add([]byte("ababaa")) + f.Fuzz(func(t *testing.T, substr []byte) { + table, err := buildKMPTable[int64, byte](diskio.SliceSequence[int64, byte](substr)) + require.NoError(t, err) + require.Equal(t, len(substr), len(table), "length") + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], + "for table[%d]=%d", j, val) + } + }) +} + +func NaiveIndexAll(str, substr []byte) []int64 { + var matches []int64 + for i := range str { + if bytes.HasPrefix(str[i:], substr) { + matches = append(matches, int64(i)) + } + } + return matches +} + +func FuzzIndexAll(f *testing.F) { + f.Fuzz(func(t *testing.T, str, substr []byte) { + if len(substr) == 0 { + t.Skip() + } + t.Logf("str =%q", str) + t.Logf("substr=%q", substr) + exp := NaiveIndexAll(str, substr) + act, err := IndexAll[int64, byte]( + &diskio.ByteReaderSequence[int64]{R: bytes.NewReader(str)}, + diskio.SliceSequence[int64, byte](substr)) + assert.NoError(t, err) + assert.Equal(t, exp, act) + }) +} + +type RESeq string + +func (re RESeq) Get(i int64) (byte, error) { + if i < 0 || i >= int64(len(re)) { + return 0, io.EOF + } + chr := re[int(i)] + if chr == '.' { + return 0, ErrWildcard + } + return chr, nil +} + +func TestKMPWildcard(t *testing.T) { + t.Parallel() + type testcase struct { + InStr string + InSubstr string + ExpMatches []int64 + } + testcases := map[string]testcase{ + "trivial-bar": { + InStr: "foo_bar", + InSubstr: "foo.ba.", + ExpMatches: []int64{0}, + }, + "trival-baz": { + InStr: "foo-baz", + InSubstr: "foo.ba.", + ExpMatches: []int64{0}, + }, + "suffix": { + InStr: "foobarbaz", + InSubstr: "...baz", + ExpMatches: []int64{3}, + }, + "overlap": { + InStr: "foobarbar", + InSubstr: "...bar", + ExpMatches: []int64{0, 3}, + }, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + matches, err := IndexAll[int64, byte]( + diskio.StringSequence[int64](tc.InStr), + RESeq(tc.InSubstr)) + assert.NoError(t, err) + assert.Equal(t, tc.ExpMatches, matches) + }) + } +} -- cgit v1.2.3-2-g168b From ef60daef395b20b67042c011f5b2a1131049e275 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 3 Feb 2023 19:50:35 -0700 Subject: rebuildmappings: Optimize the KMP search --- .../btrfsinspect/rebuildmappings/kmp_test.go | 52 +++++++++++----------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go index 910452a..acec9b8 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go @@ -6,7 +6,6 @@ package rebuildmappings import ( "bytes" - "io" "testing" "github.com/stretchr/testify/assert" @@ -15,11 +14,28 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/diskio" ) +type bytePattern[K ~int64 | ~int] []byte + +var _ kmpPattern[int, byte] = bytePattern[int]{} + +// PatLen implements kmpPattern. +func (s bytePattern[K]) PatLen() K { + return K(len(s)) +} + +// PatGet implements kmpPattern. +func (s bytePattern[K]) PatGet(i K) (byte, bool) { + chr := s[int(i)] + if chr == '.' { + return 0, false + } + return chr, true +} + func TestBuildKMPTable(t *testing.T) { t.Parallel() - substr := diskio.SliceSequence[int64, byte]([]byte("ababaa")) - table, err := buildKMPTable[int64, byte](substr) - require.NoError(t, err) + substr := bytePattern[int64]([]byte("ababaa")) + table := buildKMPTable[int64, byte](substr) require.Equal(t, []int64{0, 0, 1, 2, 3, 1}, table) @@ -33,8 +49,7 @@ func TestBuildKMPTable(t *testing.T) { func FuzzBuildKMPTable(f *testing.F) { f.Add([]byte("ababaa")) f.Fuzz(func(t *testing.T, substr []byte) { - table, err := buildKMPTable[int64, byte](diskio.SliceSequence[int64, byte](substr)) - require.NoError(t, err) + table := buildKMPTable[int64, byte](bytePattern[int64](substr)) require.Equal(t, len(substr), len(table), "length") for j, val := range table { matchLen := j + 1 @@ -62,27 +77,13 @@ func FuzzIndexAll(f *testing.F) { t.Logf("str =%q", str) t.Logf("substr=%q", substr) exp := NaiveIndexAll(str, substr) - act, err := IndexAll[int64, byte]( - &diskio.ByteReaderSequence[int64]{R: bytes.NewReader(str)}, - diskio.SliceSequence[int64, byte](substr)) - assert.NoError(t, err) + act := indexAll[int64, byte]( + diskio.SliceSequence[int64, byte](str), + bytePattern[int64](substr)) assert.Equal(t, exp, act) }) } -type RESeq string - -func (re RESeq) Get(i int64) (byte, error) { - if i < 0 || i >= int64(len(re)) { - return 0, io.EOF - } - chr := re[int(i)] - if chr == '.' { - return 0, ErrWildcard - } - return chr, nil -} - func TestKMPWildcard(t *testing.T) { t.Parallel() type testcase struct { @@ -116,10 +117,9 @@ func TestKMPWildcard(t *testing.T) { tc := tc t.Run(tcName, func(t *testing.T) { t.Parallel() - matches, err := IndexAll[int64, byte]( + matches := indexAll[int64, byte]( diskio.StringSequence[int64](tc.InStr), - RESeq(tc.InSubstr)) - assert.NoError(t, err) + bytePattern[int64](tc.InSubstr)) assert.Equal(t, tc.ExpMatches, matches) }) } -- cgit v1.2.3-2-g168b