diff options
Diffstat (limited to 'lib/diskio/kmp.go')
-rw-r--r-- | lib/diskio/kmp.go | 79 |
1 files changed, 52 insertions, 27 deletions
diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go index 69a3a51..da19e81 100644 --- a/lib/diskio/kmp.go +++ b/lib/diskio/kmp.go @@ -9,12 +9,31 @@ import ( "io" ) +func mustGet[K ~int64, V any](seq Sequence[K, V], i K) V { + val, err := seq.Get(i) + if err != nil { + panic(err) + } + return val +} + // buildKMPTable takes the string 'substr', and returns a table such // that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and // 'substr[:val] == substr[matchLen-val:matchLen]'. -func buildKMPTable(substr []byte) []int { - table := make([]int, len(substr)) - for j := range table { +func buildKMPTable[K ~int64, V comparable](substr Sequence[K, V]) ([]K, error) { + var substrLen K + for { + if _, err := substr.Get(substrLen); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + substrLen++ + } + + table := make([]K, substrLen) + for j := K(0); j < substrLen; j++ { if j == 0 { // First entry must always be 0 (in order to // satisfy 'val < matchLen'). @@ -22,62 +41,68 @@ func buildKMPTable(substr []byte) []int { } val := table[j-1] // not a match; go back - for val > 0 && substr[j] != substr[val] { + for val > 0 && mustGet(substr, j) != mustGet(substr, val) { val = table[val-1] } // is a match; go forward - if substr[val] == substr[j] { + if mustGet(substr, val) == mustGet(substr, j) { val++ } table[j] = val } - return table + return table, nil } -// FindAll returns the starting-position of all possibly-overlapping -// occurances of 'substr' in the 'r' stream. +// IndexAll returns the starting-position of all possibly-overlapping +// occurances of 'substr' in the 'str' sequence. // -// Will panic if len(substr)==0. +// Will hop around in 'substr', but will only get the natural sequence +// [0...) in order from 'str'. When hopping around in 'substr' it +// assumes that once it has gotten a given index without error, it can +// continue to do so without error; errors appearing later will cause +// panics. +// +// Will panic if the length of 'substr' is 0. // // Uses the Knuth-Morris-Pratt algorithm. -func FindAll[A ~int64](r io.ByteReader, substr []byte) ([]A, error) { - if len(substr) == 0 { - panic(errors.New("diskio.FindAll: empty substring")) +func IndexAll[K ~int64, V comparable](str, substr Sequence[K, V]) ([]K, error) { + table, err := buildKMPTable(substr) + if err != nil { + return nil, err + } + substrLen := K(len(table)) + if substrLen == 0 { + panic(errors.New("diskio.IndexAll: empty substring")) } - table := buildKMPTable(substr) - var matches []A - var curMatchBeg A - var curMatchLen int + var matches []K + var curMatchBeg K + var curMatchLen K - pos := A(-1) // if 'r' were a slice; define 'pos' such that 'chr=r[pos]' - for { - // I/O - var chr byte - chr, err := r.ReadByte() + for pos := K(0); ; pos++ { + chr, err := str.Get(pos) if err != nil { if errors.Is(err, io.EOF) { err = nil } return matches, err } - pos++ // Consider 'chr' - for curMatchLen > 0 && chr != substr[curMatchLen] { // shorten the match + for curMatchLen > 0 && chr != mustGet(substr, curMatchLen) { // shorten the match overlap := table[curMatchLen-1] - curMatchBeg += A(curMatchLen - overlap) + curMatchBeg += curMatchLen - overlap curMatchLen = overlap } - if chr == substr[curMatchLen] { // lengthen the match + if chr == mustGet(substr, curMatchLen) { // lengthen the match if curMatchLen == 0 { curMatchBeg = pos } curMatchLen++ - if curMatchLen == len(substr) { + if curMatchLen == substrLen { matches = append(matches, curMatchBeg) overlap := table[curMatchLen-1] - curMatchBeg += A(curMatchLen - overlap) + curMatchBeg += curMatchLen - overlap curMatchLen = overlap } } |