diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-03 19:50:35 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-02-04 20:53:23 -0700 |
commit | ef60daef395b20b67042c011f5b2a1131049e275 (patch) | |
tree | c70aa1661272e10883bbc57373cf00ab980ef336 /lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go | |
parent | 77f3c0d7cd21274d00984b72dfce05394d11bdd0 (diff) |
rebuildmappings: Optimize the KMP search
Diffstat (limited to 'lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go')
-rw-r--r-- | lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go | 106 |
1 files changed, 35 insertions, 71 deletions
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go index eeaab0c..20772ba 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go @@ -6,48 +6,24 @@ package rebuildmappings import ( "errors" - "io" "git.lukeshu.com/btrfs-progs-ng/lib/diskio" ) -var ErrWildcard = errors.New("wildcard") - -func kmpEq2[K ~int64, V comparable](aS diskio.Sequence[K, V], aI K, bS diskio.Sequence[K, V], bI K) bool { - aV, aErr := aS.Get(aI) - bV, bErr := bS.Get(bI) - if aErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if aErr == ErrWildcard || errors.Is(aErr, ErrWildcard) { - aV = bV - aErr = nil - } else { - panic(aErr) - } - } - if bErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { - bV = aV - bErr = nil - } else { - panic(bErr) - } - } - if aErr != nil || bErr != nil { - return false - } - return aV == bV +type kmpPattern[K ~int64 | ~int, V comparable] interface { + PatLen() K + // Get the value at 'pos' in the sequence. Positions start at + // 0 and increment naturally. It is invalid to call Get(pos) + // with a pos that is >= Len(). If there is a gap/wildcard at + // pos, ok is false. + PatGet(pos K) (v V, ok bool) } -func kmpEq1[K ~int64, V comparable](aV V, bS diskio.Sequence[K, V], bI K) bool { - bV, bErr := bS.Get(bI) - if bErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { - return true - } - panic(bErr) +func kmpSelfEq[K ~int64 | ~int, V comparable](s kmpPattern[K, V], aI K, bI K) bool { + aV, aOK := s.PatGet(aI) + bV, bOK := s.PatGet(bI) + if !aOK || !bOK { + return true } return aV == bV } @@ -55,18 +31,8 @@ func kmpEq1[K ~int64, V comparable](aV V, bS diskio.Sequence[K, V], bI K) bool { // buildKMPTable takes the string 'substr', and returns a table such // that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and // 'substr[:val] == substr[matchLen-val:matchLen]'. -func buildKMPTable[K ~int64, V comparable](substr diskio.Sequence[K, V]) ([]K, error) { - var substrLen K - for { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if _, err := substr.Get(substrLen); err != nil && !(err == ErrWildcard || errors.Is(err, ErrWildcard)) { - if errors.Is(err, io.EOF) { - break - } - return nil, err - } - substrLen++ - } +func buildKMPTable[K ~int64 | ~int, V comparable](substr kmpPattern[K, V]) []K { + substrLen := substr.PatLen() table := make([]K, substrLen) for j := K(0); j < substrLen; j++ { @@ -77,26 +43,31 @@ func buildKMPTable[K ~int64, V comparable](substr diskio.Sequence[K, V]) ([]K, e } val := table[j-1] // not a match; go back - for val > 0 && !kmpEq2(substr, j, substr, val) { + for val > 0 && !kmpSelfEq(substr, j, val) { val = table[val-1] } // is a match; go forward - if kmpEq2(substr, val, substr, j) { + if kmpSelfEq(substr, val, j) { val++ } table[j] = val } - return table, nil + return table } -// IndexAll returns the starting-position of all possibly-overlapping +func kmpEq[K ~int64 | ~int, V comparable](aV V, bS kmpPattern[K, V], bI K) bool { + bV, ok := bS.PatGet(bI) + if !ok { + return true + } + return aV == bV +} + +// indexAll returns the starting-position of all possibly-overlapping // occurrences of 'substr' in the 'str' sequence. // // Will hop around in 'substr', but will only get the natural sequence -// [0...) in order from 'str'. When hopping around in 'substr' it -// assumes that once it has gotten a given index without error, it can -// continue to do so without error; errors appearing later will cause -// panics. +// [0...) in order from 'str'. // // Will panic if the length of 'substr' is 0. // @@ -104,11 +75,8 @@ func buildKMPTable[K ~int64, V comparable](substr diskio.Sequence[K, V]) ([]K, e // ErrWildcard for a position. // // Uses the Knuth-Morris-Pratt algorithm. -func IndexAll[K ~int64, V comparable](str, substr diskio.Sequence[K, V]) ([]K, error) { - table, err := buildKMPTable(substr) - if err != nil { - return nil, err - } +func indexAll[K ~int64 | ~int, V comparable](str diskio.Sequence[K, V], substr kmpPattern[K, V]) []K { + table := buildKMPTable(substr) substrLen := K(len(table)) if substrLen == 0 { panic(errors.New("rebuildmappings.IndexAll: empty substring")) @@ -118,22 +86,17 @@ func IndexAll[K ~int64, V comparable](str, substr diskio.Sequence[K, V]) ([]K, e var curMatchBeg K var curMatchLen K - for pos := K(0); ; pos++ { - chr, err := str.Get(pos) - if err != nil { - if errors.Is(err, io.EOF) { - err = nil - } - return matches, err - } + strLen := str.SeqLen() + for pos := K(0); pos < strLen; pos++ { + chr := str.SeqGet(pos) // Consider 'chr' - for curMatchLen > 0 && !kmpEq1(chr, substr, curMatchLen) { // shorten the match + for curMatchLen > 0 && !kmpEq(chr, substr, curMatchLen) { // shorten the match overlap := table[curMatchLen-1] curMatchBeg += curMatchLen - overlap curMatchLen = overlap } - if kmpEq1(chr, substr, curMatchLen) { // lengthen the match + if kmpEq(chr, substr, curMatchLen) { // lengthen the match if curMatchLen == 0 { curMatchBeg = pos } @@ -146,4 +109,5 @@ func IndexAll[K ~int64, V comparable](str, substr diskio.Sequence[K, V]) ([]K, e } } } + return matches } |