summaryrefslogtreecommitdiff
path: root/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go
blob: 29dd26ebe4ae550b924656a84a1fe28d515d4670 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

package rebuildmappings

import (
	"errors"

	"git.lukeshu.com/btrfs-progs-ng/lib/diskio"
)

type kmpPattern[K ~int64 | ~int, V comparable] interface {
	PatLen() K
	// Get the value at 'pos' in the sequence.  Positions start at
	// 0 and increment naturally.  It is invalid to call Get(pos)
	// with a pos that is >= Len().  If there is a gap/wildcard at
	// pos, ok is false.
	PatGet(pos K) (v V, ok bool)
}

func kmpSelfEq[K ~int64 | ~int, V comparable](s kmpPattern[K, V], aI K, bI K) bool {
	aV, aOK := s.PatGet(aI)
	bV, bOK := s.PatGet(bI)
	if !aOK || !bOK {
		return true
	}
	return aV == bV
}

// buildKMPTable takes the string 'substr', and returns a table such
// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and
// 'substr[:val] == substr[matchLen-val:matchLen]'.
func buildKMPTable[K ~int64 | ~int, V comparable](substr kmpPattern[K, V]) []K {
	substrLen := substr.PatLen()

	table := make([]K, substrLen)
	for j := K(0); j < substrLen; j++ {
		if j == 0 {
			// First entry must always be 0 (in order to
			// satisfy 'val < matchLen').
			continue
		}
		val := table[j-1]
		// not a match; go back
		for val > 0 && !kmpSelfEq(substr, j, val) {
			val = table[val-1]
		}
		// is a match; go forward
		if kmpSelfEq(substr, val, j) {
			val++
		}
		table[j] = val
	}
	return table
}

func kmpEq[K ~int64 | ~int, V comparable](aV V, bS kmpPattern[K, V], bI K) bool {
	bV, ok := bS.PatGet(bI)
	if !ok {
		return true
	}
	return aV == bV
}

// indexAll returns the starting-position of all possibly-overlapping
// occurrences of 'substr' in the 'str' sequence.
//
// Will hop around in 'substr', but will only get the natural sequence
// [0...) in order from 'str'.
//
// Will panic if the length of 'substr' is 0.
//
// The 'substr' may include wildcard characters by returning
// (_, false) for a position.
//
// Uses the Knuth-Morris-Pratt algorithm.
func indexAll[K ~int64 | ~int, V comparable](str diskio.Sequence[K, V], substr kmpPattern[K, V]) []K {
	table := buildKMPTable(substr)
	substrLen := K(len(table))
	if substrLen == 0 {
		panic(errors.New("rebuildmappings.IndexAll: empty substring"))
	}

	var matches []K
	var curMatchBeg K
	var curMatchLen K

	for pos, strLen := K(0), str.SeqLen(); pos < strLen; pos++ {
		chr := str.SeqGet(pos)

		// Consider 'chr'
		for curMatchLen > 0 && !kmpEq(chr, substr, curMatchLen) { // shorten the match
			overlap := table[curMatchLen-1]
			curMatchBeg += curMatchLen - overlap
			curMatchLen = overlap
		}
		if kmpEq(chr, substr, curMatchLen) { // lengthen the match
			if curMatchLen == 0 {
				curMatchBeg = pos
			}
			curMatchLen++
			if curMatchLen == substrLen {
				matches = append(matches, curMatchBeg)
				overlap := table[curMatchLen-1]
				curMatchBeg += curMatchLen - overlap
				curMatchLen = overlap
			}
		}
	}
	return matches
}