summaryrefslogtreecommitdiff
path: root/lib/diskio/kmp.go
diff options
context:
space:
mode:
Diffstat (limited to 'lib/diskio/kmp.go')
-rw-r--r--lib/diskio/kmp.go79
1 files changed, 52 insertions, 27 deletions
diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go
index 69a3a51..da19e81 100644
--- a/lib/diskio/kmp.go
+++ b/lib/diskio/kmp.go
@@ -9,12 +9,31 @@ import (
"io"
)
+func mustGet[K ~int64, V any](seq Sequence[K, V], i K) V {
+ val, err := seq.Get(i)
+ if err != nil {
+ panic(err)
+ }
+ return val
+}
+
// buildKMPTable takes the string 'substr', and returns a table such
// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and
// 'substr[:val] == substr[matchLen-val:matchLen]'.
-func buildKMPTable(substr []byte) []int {
- table := make([]int, len(substr))
- for j := range table {
+func buildKMPTable[K ~int64, V comparable](substr Sequence[K, V]) ([]K, error) {
+ var substrLen K
+ for {
+ if _, err := substr.Get(substrLen); err != nil {
+ if errors.Is(err, io.EOF) {
+ break
+ }
+ return nil, err
+ }
+ substrLen++
+ }
+
+ table := make([]K, substrLen)
+ for j := K(0); j < substrLen; j++ {
if j == 0 {
// First entry must always be 0 (in order to
// satisfy 'val < matchLen').
@@ -22,62 +41,68 @@ func buildKMPTable(substr []byte) []int {
}
val := table[j-1]
// not a match; go back
- for val > 0 && substr[j] != substr[val] {
+ for val > 0 && mustGet(substr, j) != mustGet(substr, val) {
val = table[val-1]
}
// is a match; go forward
- if substr[val] == substr[j] {
+ if mustGet(substr, val) == mustGet(substr, j) {
val++
}
table[j] = val
}
- return table
+ return table, nil
}
-// FindAll returns the starting-position of all possibly-overlapping
-// occurances of 'substr' in the 'r' stream.
+// IndexAll returns the starting-position of all possibly-overlapping
+// occurances of 'substr' in the 'str' sequence.
//
-// Will panic if len(substr)==0.
+// Will hop around in 'substr', but will only get the natural sequence
+// [0...) in order from 'str'. When hopping around in 'substr' it
+// assumes that once it has gotten a given index without error, it can
+// continue to do so without error; errors appearing later will cause
+// panics.
+//
+// Will panic if the length of 'substr' is 0.
//
// Uses the Knuth-Morris-Pratt algorithm.
-func FindAll[A ~int64](r io.ByteReader, substr []byte) ([]A, error) {
- if len(substr) == 0 {
- panic(errors.New("diskio.FindAll: empty substring"))
+func IndexAll[K ~int64, V comparable](str, substr Sequence[K, V]) ([]K, error) {
+ table, err := buildKMPTable(substr)
+ if err != nil {
+ return nil, err
+ }
+ substrLen := K(len(table))
+ if substrLen == 0 {
+ panic(errors.New("diskio.IndexAll: empty substring"))
}
- table := buildKMPTable(substr)
- var matches []A
- var curMatchBeg A
- var curMatchLen int
+ var matches []K
+ var curMatchBeg K
+ var curMatchLen K
- pos := A(-1) // if 'r' were a slice; define 'pos' such that 'chr=r[pos]'
- for {
- // I/O
- var chr byte
- chr, err := r.ReadByte()
+ for pos := K(0); ; pos++ {
+ chr, err := str.Get(pos)
if err != nil {
if errors.Is(err, io.EOF) {
err = nil
}
return matches, err
}
- pos++
// Consider 'chr'
- for curMatchLen > 0 && chr != substr[curMatchLen] { // shorten the match
+ for curMatchLen > 0 && chr != mustGet(substr, curMatchLen) { // shorten the match
overlap := table[curMatchLen-1]
- curMatchBeg += A(curMatchLen - overlap)
+ curMatchBeg += curMatchLen - overlap
curMatchLen = overlap
}
- if chr == substr[curMatchLen] { // lengthen the match
+ if chr == mustGet(substr, curMatchLen) { // lengthen the match
if curMatchLen == 0 {
curMatchBeg = pos
}
curMatchLen++
- if curMatchLen == len(substr) {
+ if curMatchLen == substrLen {
matches = append(matches, curMatchBeg)
overlap := table[curMatchLen-1]
- curMatchBeg += A(curMatchLen - overlap)
+ curMatchBeg += curMatchLen - overlap
curMatchLen = overlap
}
}