summaryrefslogtreecommitdiff
path: root/lib/diskio/kmp_test.go
blob: 4d4b3bef838b5add4525430e52f6d5850cfb2857 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

package diskio

import (
	"bytes"
	"io"
	"testing"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

func TestBuildKMPTable(t *testing.T) {
	t.Parallel()
	substr := SliceSequence[int64, byte]([]byte("ababaa"))
	table, err := buildKMPTable[int64, byte](substr)
	require.NoError(t, err)
	require.Equal(t,
		[]int64{0, 0, 1, 2, 3, 1},
		table)
	for j, val := range table {
		matchLen := j + 1
		assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
			"for table[%d]=%d", j, val)
	}
}

func FuzzBuildKMPTable(f *testing.F) {
	f.Add([]byte("ababaa"))
	f.Fuzz(func(t *testing.T, substr []byte) {
		table, err := buildKMPTable[int64, byte](SliceSequence[int64, byte](substr))
		require.NoError(t, err)
		require.Equal(t, len(substr), len(table), "length")
		for j, val := range table {
			matchLen := j + 1
			assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
				"for table[%d]=%d", j, val)
		}
	})
}

func NaiveIndexAll(str, substr []byte) []int64 {
	var matches []int64
	for i := range str {
		if bytes.HasPrefix(str[i:], substr) {
			matches = append(matches, int64(i))
		}
	}
	return matches
}

func FuzzIndexAll(f *testing.F) {
	f.Fuzz(func(t *testing.T, str, substr []byte) {
		if len(substr) == 0 {
			t.Skip()
		}
		t.Logf("str   =%q", str)
		t.Logf("substr=%q", substr)
		exp := NaiveIndexAll(str, substr)
		act, err := IndexAll[int64, byte](
			&ByteReaderSequence[int64]{R: bytes.NewReader(str)},
			SliceSequence[int64, byte](substr))
		assert.NoError(t, err)
		assert.Equal(t, exp, act)
	})
}

type RESeq string

func (re RESeq) Get(i int64) (byte, error) {
	if i < 0 || i >= int64(len(re)) {
		return 0, io.EOF
	}
	chr := re[int(i)]
	if chr == '.' {
		return 0, ErrWildcard
	}
	return chr, nil
}

func TestKMPWildcard(t *testing.T) {
	t.Parallel()
	type testcase struct {
		InStr      string
		InSubstr   string
		ExpMatches []int64
	}
	testcases := map[string]testcase{
		"trivial-bar": {
			InStr:      "foo_bar",
			InSubstr:   "foo.ba.",
			ExpMatches: []int64{0},
		},
		"trival-baz": {
			InStr:      "foo-baz",
			InSubstr:   "foo.ba.",
			ExpMatches: []int64{0},
		},
		"suffix": {
			InStr:      "foobarbaz",
			InSubstr:   "...baz",
			ExpMatches: []int64{3},
		},
		"overlap": {
			InStr:      "foobarbar",
			InSubstr:   "...bar",
			ExpMatches: []int64{0, 3},
		},
	}
	for tcName, tc := range testcases {
		tc := tc
		t.Run(tcName, func(t *testing.T) {
			t.Parallel()
			matches, err := IndexAll[int64, byte](
				StringSequence[int64](tc.InStr),
				RESeq(tc.InSubstr))
			assert.NoError(t, err)
			assert.Equal(t, tc.ExpMatches, matches)
		})
	}
}