diff options
Diffstat (limited to 'lib')
17 files changed, 400 insertions, 439 deletions
diff --git a/lib/btrfs/btrfssum/csum.go b/lib/btrfs/btrfssum/csum.go index 6df9efd..f436e2c 100644 --- a/lib/btrfs/btrfssum/csum.go +++ b/lib/btrfs/btrfssum/csum.go @@ -14,6 +14,8 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/fmtutil" ) +const BlockSize = 4 * 1024 + type CSum [0x20]byte var ( diff --git a/lib/btrfs/btrfssum/shortsum.go b/lib/btrfs/btrfssum/shortsum.go index e3441ae..754a79d 100644 --- a/lib/btrfs/btrfssum/shortsum.go +++ b/lib/btrfs/btrfssum/shortsum.go @@ -5,22 +5,14 @@ package btrfssum import ( - "context" "fmt" "io" "math" "strings" "git.lukeshu.com/go/lowmemjson" - - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" ) -const BlockSize = 4 * 1024 - -// ShortSum ////////////////////////////////////////////////////////// - type ShortSum string var ( @@ -105,196 +97,3 @@ func (sum *ShortSum) DecodeJSON(r io.RuneScanner) error { *sum = ShortSum(out.String()) return nil } - -// SumRun //////////////////////////////////////////////////////////// - -type SumRun[Addr btrfsvol.IntAddr[Addr]] struct { - // How big a ShortSum is in this Run. - ChecksumSize int `json:",omitempty"` - // Base address where this run starts. - Addr Addr `json:",omitempty"` - // All of the ShortSums in this run, concatenated together. - Sums ShortSum -} - -func (run SumRun[Addr]) NumSums() int { - return len(run.Sums) / run.ChecksumSize -} - -func (run SumRun[Addr]) Size() btrfsvol.AddrDelta { - return btrfsvol.AddrDelta(run.NumSums()) * BlockSize -} - -// Get implements diskio.Sequence[int, ShortSum] -func (run SumRun[Addr]) Get(sumIdx int64) (ShortSum, error) { - if sumIdx < 0 || int(sumIdx) >= run.NumSums() { - return "", io.EOF - } - off := int(sumIdx) * run.ChecksumSize - return run.Sums[off : off+run.ChecksumSize], nil -} - -func (run SumRun[Addr]) SumForAddr(addr Addr) (ShortSum, bool) { - if addr < run.Addr || addr >= run.Addr.Add(run.Size()) { - return "", false - } - off := int((addr-run.Addr)/BlockSize) * run.ChecksumSize - return run.Sums[off : off+run.ChecksumSize], true -} - -func (run SumRun[Addr]) Walk(ctx context.Context, fn func(Addr, ShortSum) error) error { - for addr, off := run.Addr, 0; off < len(run.Sums); addr, off = addr+BlockSize, off+run.ChecksumSize { - if err := ctx.Err(); err != nil { - return err - } - if err := fn(addr, run.Sums[off:off+run.ChecksumSize]); err != nil { - return err - } - } - return nil -} - -// SumRunWithGaps //////////////////////////////////////////////////// - -type SumRunWithGaps[Addr btrfsvol.IntAddr[Addr]] struct { - // Store the start address and size, in order to facilitate - // leading and trailing gaps. - Addr Addr - Size btrfsvol.AddrDelta - - Runs []SumRun[Addr] -} - -var ( - _ lowmemjson.Encodable = SumRunWithGaps[btrfsvol.LogicalAddr]{} - _ lowmemjson.Decodable = (*SumRunWithGaps[btrfsvol.LogicalAddr])(nil) -) - -func (sg SumRunWithGaps[Addr]) NumSums() int { - return int(sg.Size / BlockSize) -} - -func (sg SumRunWithGaps[Addr]) PctFull() float64 { - total := sg.NumSums() - var full int - for _, run := range sg.Runs { - full += run.NumSums() - } - return float64(full) / float64(total) -} - -func (sg SumRunWithGaps[Addr]) RunForAddr(addr Addr) (SumRun[Addr], Addr, bool) { - for _, run := range sg.Runs { - if run.Addr > addr { - return SumRun[Addr]{}, run.Addr, false - } - if run.Addr.Add(run.Size()) <= addr { - continue - } - return run, 0, true - } - return SumRun[Addr]{}, math.MaxInt64, false -} - -func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (ShortSum, error) { - if addr < sg.Addr || addr >= sg.Addr.Add(sg.Size) { - return "", io.EOF - } - for _, run := range sg.Runs { - if run.Addr > addr { - return "", diskio.ErrWildcard - } - if run.Addr.Add(run.Size()) <= addr { - continue - } - off := int((addr-run.Addr)/BlockSize) * run.ChecksumSize - return run.Sums[off : off+run.ChecksumSize], nil - } - return "", diskio.ErrWildcard -} - -func (sg SumRunWithGaps[Addr]) Walk(ctx context.Context, fn func(Addr, ShortSum) error) error { - for _, run := range sg.Runs { - if err := run.Walk(ctx, fn); err != nil { - return err - } - } - return nil -} - -// Get implements diskio.Sequence[int, ShortSum] -func (sg SumRunWithGaps[Addr]) Get(sumIdx int64) (ShortSum, error) { - addr := sg.Addr.Add(btrfsvol.AddrDelta(sumIdx) * BlockSize) - return sg.SumForAddr(addr) -} - -func (sg SumRunWithGaps[Addr]) EncodeJSON(w io.Writer) error { - if _, err := fmt.Fprintf(w, `{"Addr":%d,"Size":%d,"Runs":[`, sg.Addr, sg.Size); err != nil { - return err - } - cur := sg.Addr - for i, run := range sg.Runs { - if i > 0 { - if _, err := w.Write([]byte{','}); err != nil { - return err - } - } - switch { - case run.Addr < cur: - return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, run.Addr, cur) - case run.Addr > cur: - if _, err := fmt.Fprintf(w, `{"Gap":%d},`, run.Addr.Sub(cur)); err != nil { - return err - } - fallthrough - default: - if err := lowmemjson.NewEncoder(w).Encode(run); err != nil { - return err - } - cur = run.Addr.Add(run.Size()) - } - } - end := sg.Addr.Add(sg.Size) - switch { - case end < cur: - return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, end, cur) - case end > cur: - if _, err := fmt.Fprintf(w, `,{"Gap":%d}`, end.Sub(cur)); err != nil { - return err - } - } - if _, err := w.Write([]byte("]}")); err != nil { - return err - } - return nil -} - -func (sg *SumRunWithGaps[Addr]) DecodeJSON(r io.RuneScanner) error { - *sg = SumRunWithGaps[Addr]{} - var name string - return lowmemjson.DecodeObject(r, - func(r io.RuneScanner) error { - return lowmemjson.NewDecoder(r).Decode(&name) - }, - func(r io.RuneScanner) error { - switch name { - case "Addr": - return lowmemjson.NewDecoder(r).Decode(&sg.Addr) - case "Size": - return lowmemjson.NewDecoder(r).Decode(&sg.Size) - case "Runs": - return lowmemjson.DecodeArray(r, func(r io.RuneScanner) error { - var run SumRun[Addr] - if err := lowmemjson.NewDecoder(r).Decode(&run); err != nil { - return err - } - if run.ChecksumSize > 0 { - sg.Runs = append(sg.Runs, run) - } - return nil - }) - default: - return fmt.Errorf("unknown key %q", name) - } - }) -} diff --git a/lib/btrfs/btrfssum/sumrun.go b/lib/btrfs/btrfssum/sumrun.go new file mode 100644 index 0000000..bc2db3f --- /dev/null +++ b/lib/btrfs/btrfssum/sumrun.go @@ -0,0 +1,58 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrfssum + +import ( + "context" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +type SumRun[Addr btrfsvol.IntAddr[Addr]] struct { + // How big a ShortSum is in this Run. + ChecksumSize int `json:",omitempty"` + // Base address where this run starts. + Addr Addr `json:",omitempty"` + // All of the ShortSums in this run, concatenated together. + Sums ShortSum +} + +var _ diskio.Sequence[int, ShortSum] = SumRun[btrfsvol.LogicalAddr]{} + +// SeqLen implements diskio.Sequence[int, ShortSum]. +func (run SumRun[Addr]) SeqLen() int { + return len(run.Sums) / run.ChecksumSize +} + +func (run SumRun[Addr]) Size() btrfsvol.AddrDelta { + return btrfsvol.AddrDelta(run.SeqLen()) * BlockSize +} + +// SeqGet implements diskio.Sequence[int, ShortSum]. +func (run SumRun[Addr]) SeqGet(sumIdx int) ShortSum { + off := sumIdx * run.ChecksumSize + return run.Sums[off : off+run.ChecksumSize] +} + +func (run SumRun[Addr]) SumForAddr(addr Addr) (ShortSum, bool) { + if addr < run.Addr || addr >= run.Addr.Add(run.Size()) { + return "", false + } + off := int((addr-run.Addr)/BlockSize) * run.ChecksumSize + return run.Sums[off : off+run.ChecksumSize], true +} + +func (run SumRun[Addr]) Walk(ctx context.Context, fn func(Addr, ShortSum) error) error { + for addr, off := run.Addr, 0; off < len(run.Sums); addr, off = addr+BlockSize, off+run.ChecksumSize { + if err := ctx.Err(); err != nil { + return err + } + if err := fn(addr, run.Sums[off:off+run.ChecksumSize]); err != nil { + return err + } + } + return nil +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go index 9e6b864..6b75d84 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go @@ -41,7 +41,7 @@ func fuzzyMatchBlockGroupSums(ctx context.Context, fs *btrfs.FS, blockgroups map[btrfsvol.LogicalAddr]BlockGroup, physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], - logicalSums btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr], + logicalSums SumRunWithGaps[btrfsvol.LogicalAddr], ) error { _ctx := ctx @@ -69,7 +69,7 @@ func fuzzyMatchBlockGroupSums(ctx context.Context, blockgroup := blockgroups[bgLAddr] bgRun := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size) - d := bgRun.NumSums() + d := bgRun.PatLen() matches := make(map[btrfsvol.QualifiedPhysicalAddr]int) if err := bgRun.Walk(ctx, func(laddr btrfsvol.LogicalAddr, sum btrfssum.ShortSum) error { // O(n*… off := laddr.Sub(bgRun.Addr) diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go new file mode 100644 index 0000000..20772ba --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp.go @@ -0,0 +1,113 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "errors" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +type kmpPattern[K ~int64 | ~int, V comparable] interface { + PatLen() K + // Get the value at 'pos' in the sequence. Positions start at + // 0 and increment naturally. It is invalid to call Get(pos) + // with a pos that is >= Len(). If there is a gap/wildcard at + // pos, ok is false. + PatGet(pos K) (v V, ok bool) +} + +func kmpSelfEq[K ~int64 | ~int, V comparable](s kmpPattern[K, V], aI K, bI K) bool { + aV, aOK := s.PatGet(aI) + bV, bOK := s.PatGet(bI) + if !aOK || !bOK { + return true + } + return aV == bV +} + +// buildKMPTable takes the string 'substr', and returns a table such +// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and +// 'substr[:val] == substr[matchLen-val:matchLen]'. +func buildKMPTable[K ~int64 | ~int, V comparable](substr kmpPattern[K, V]) []K { + substrLen := substr.PatLen() + + table := make([]K, substrLen) + for j := K(0); j < substrLen; j++ { + if j == 0 { + // First entry must always be 0 (in order to + // satisfy 'val < matchLen'). + continue + } + val := table[j-1] + // not a match; go back + for val > 0 && !kmpSelfEq(substr, j, val) { + val = table[val-1] + } + // is a match; go forward + if kmpSelfEq(substr, val, j) { + val++ + } + table[j] = val + } + return table +} + +func kmpEq[K ~int64 | ~int, V comparable](aV V, bS kmpPattern[K, V], bI K) bool { + bV, ok := bS.PatGet(bI) + if !ok { + return true + } + return aV == bV +} + +// indexAll returns the starting-position of all possibly-overlapping +// occurrences of 'substr' in the 'str' sequence. +// +// Will hop around in 'substr', but will only get the natural sequence +// [0...) in order from 'str'. +// +// Will panic if the length of 'substr' is 0. +// +// The 'substr' may include wildcard characters by returning +// ErrWildcard for a position. +// +// Uses the Knuth-Morris-Pratt algorithm. +func indexAll[K ~int64 | ~int, V comparable](str diskio.Sequence[K, V], substr kmpPattern[K, V]) []K { + table := buildKMPTable(substr) + substrLen := K(len(table)) + if substrLen == 0 { + panic(errors.New("rebuildmappings.IndexAll: empty substring")) + } + + var matches []K + var curMatchBeg K + var curMatchLen K + + strLen := str.SeqLen() + for pos := K(0); pos < strLen; pos++ { + chr := str.SeqGet(pos) + + // Consider 'chr' + for curMatchLen > 0 && !kmpEq(chr, substr, curMatchLen) { // shorten the match + overlap := table[curMatchLen-1] + curMatchBeg += curMatchLen - overlap + curMatchLen = overlap + } + if kmpEq(chr, substr, curMatchLen) { // lengthen the match + if curMatchLen == 0 { + curMatchBeg = pos + } + curMatchLen++ + if curMatchLen == substrLen { + matches = append(matches, curMatchBeg) + overlap := table[curMatchLen-1] + curMatchBeg += curMatchLen - overlap + curMatchLen = overlap + } + } + } + return matches +} diff --git a/lib/diskio/kmp_test.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go index 4d4b3be..acec9b8 100644 --- a/lib/diskio/kmp_test.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/kmp_test.go @@ -2,22 +2,40 @@ // // SPDX-License-Identifier: GPL-2.0-or-later -package diskio +package rebuildmappings import ( "bytes" - "io" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" ) +type bytePattern[K ~int64 | ~int] []byte + +var _ kmpPattern[int, byte] = bytePattern[int]{} + +// PatLen implements kmpPattern. +func (s bytePattern[K]) PatLen() K { + return K(len(s)) +} + +// PatGet implements kmpPattern. +func (s bytePattern[K]) PatGet(i K) (byte, bool) { + chr := s[int(i)] + if chr == '.' { + return 0, false + } + return chr, true +} + func TestBuildKMPTable(t *testing.T) { t.Parallel() - substr := SliceSequence[int64, byte]([]byte("ababaa")) - table, err := buildKMPTable[int64, byte](substr) - require.NoError(t, err) + substr := bytePattern[int64]([]byte("ababaa")) + table := buildKMPTable[int64, byte](substr) require.Equal(t, []int64{0, 0, 1, 2, 3, 1}, table) @@ -31,8 +49,7 @@ func TestBuildKMPTable(t *testing.T) { func FuzzBuildKMPTable(f *testing.F) { f.Add([]byte("ababaa")) f.Fuzz(func(t *testing.T, substr []byte) { - table, err := buildKMPTable[int64, byte](SliceSequence[int64, byte](substr)) - require.NoError(t, err) + table := buildKMPTable[int64, byte](bytePattern[int64](substr)) require.Equal(t, len(substr), len(table), "length") for j, val := range table { matchLen := j + 1 @@ -60,27 +77,13 @@ func FuzzIndexAll(f *testing.F) { t.Logf("str =%q", str) t.Logf("substr=%q", substr) exp := NaiveIndexAll(str, substr) - act, err := IndexAll[int64, byte]( - &ByteReaderSequence[int64]{R: bytes.NewReader(str)}, - SliceSequence[int64, byte](substr)) - assert.NoError(t, err) + act := indexAll[int64, byte]( + diskio.SliceSequence[int64, byte](str), + bytePattern[int64](substr)) assert.Equal(t, exp, act) }) } -type RESeq string - -func (re RESeq) Get(i int64) (byte, error) { - if i < 0 || i >= int64(len(re)) { - return 0, io.EOF - } - chr := re[int(i)] - if chr == '.' { - return 0, ErrWildcard - } - return chr, nil -} - func TestKMPWildcard(t *testing.T) { t.Parallel() type testcase struct { @@ -114,10 +117,9 @@ func TestKMPWildcard(t *testing.T) { tc := tc t.Run(tcName, func(t *testing.T) { t.Parallel() - matches, err := IndexAll[int64, byte]( - StringSequence[int64](tc.InStr), - RESeq(tc.InSubstr)) - assert.NoError(t, err) + matches := indexAll[int64, byte]( + diskio.StringSequence[int64](tc.InStr), + bytePattern[int64](tc.InSubstr)) assert.Equal(t, tc.ExpMatches, matches) }) } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go index 537a970..69d14c7 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> // // SPDX-License-Identifier: GPL-2.0-or-later @@ -20,7 +20,7 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/slices" ) -func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] { +func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) SumRunWithGaps[btrfsvol.LogicalAddr] { var records []btrfsinspect.SysExtentCSum for _, devResults := range scanResults { records = append(records, devResults.FoundExtentCSums...) @@ -38,7 +38,7 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice } }) if len(records) == 0 { - return btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]{} + return SumRunWithGaps[btrfsvol.LogicalAddr]{} } sumSize := records[0].Sums.ChecksumSize @@ -149,7 +149,7 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice } // Now flatten that RBTree in to a SumRunWithGaps. - var flattened btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] + var flattened SumRunWithGaps[btrfsvol.LogicalAddr] var curAddr btrfsvol.LogicalAddr var curSums strings.Builder _ = addrspace.Walk(func(node *containers.RBNode[btrfsinspect.SysExtentCSum]) error { @@ -183,7 +183,7 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice return flattened } -func ListUnmappedLogicalRegions(fs *btrfs.FS, logicalSums btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]) []btrfssum.SumRun[btrfsvol.LogicalAddr] { +func ListUnmappedLogicalRegions(fs *btrfs.FS, logicalSums SumRunWithGaps[btrfsvol.LogicalAddr]) []btrfssum.SumRun[btrfsvol.LogicalAddr] { // There are a lot of ways this algorithm could be made // faster. var ret []btrfssum.SumRun[btrfsvol.LogicalAddr] @@ -226,8 +226,8 @@ func ListUnmappedLogicalRegions(fs *btrfs.FS, logicalSums btrfssum.SumRunWithGap return ret } -func SumsForLogicalRegion(sums btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr], beg btrfsvol.LogicalAddr, size btrfsvol.AddrDelta) btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] { - runs := btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]{ +func SumsForLogicalRegion(sums SumRunWithGaps[btrfsvol.LogicalAddr], beg btrfsvol.LogicalAddr, size btrfsvol.AddrDelta) SumRunWithGaps[btrfsvol.LogicalAddr] { + runs := SumRunWithGaps[btrfsvol.LogicalAddr]{ Addr: beg, Size: size, } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go index 02c657f..a3e724e 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go @@ -14,7 +14,6 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" "git.lukeshu.com/btrfs-progs-ng/lib/maps" ) @@ -22,7 +21,7 @@ func matchBlockGroupSums(ctx context.Context, fs *btrfs.FS, blockgroups map[btrfsvol.LogicalAddr]BlockGroup, physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], - logicalSums btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr], + logicalSums SumRunWithGaps[btrfsvol.LogicalAddr], ) error { regions := ListUnmappedPhysicalRegions(fs) numBlockgroups := len(blockgroups) @@ -37,10 +36,7 @@ func matchBlockGroupSums(ctx context.Context, var matches []btrfsvol.QualifiedPhysicalAddr if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error { - rawMatches, err := diskio.IndexAll[int64, btrfssum.ShortSum](region, bgRun) - if err != nil { - return err - } + rawMatches := indexAll[int, btrfssum.ShortSum](region, bgRun) for _, match := range rawMatches { matches = append(matches, btrfsvol.QualifiedPhysicalAddr{ Dev: devID, diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go index 665bc96..cdf5e5a 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go @@ -158,10 +158,6 @@ func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect // The fuzzy-search is only fast because the exact-search is so good at getting `physicalBlocks` down. // Empirically: if I remove the exact-search step, then the fuzzy-match step is more than an order of magnitude // slower. - // - // The exact-search probably could be optimized to be substantially faster (by a constant factor; not affecting - // the big-O) by figuring out how to inline function calls and get reduce allocations, but IMO it's "fast - // enough" for now. ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "5/6") dlog.Infof(_ctx, "5/6: Searching for %d block groups in checksum map (exact)...", len(bgs)) physicalSums := ExtractPhysicalSums(scanResults) diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go new file mode 100644 index 0000000..f79e2be --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/sumrunwithgaps.go @@ -0,0 +1,167 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "fmt" + "io" + "math" + + "git.lukeshu.com/go/lowmemjson" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" +) + +type SumRunWithGaps[Addr btrfsvol.IntAddr[Addr]] struct { + // Store the start address and size, in order to facilitate + // leading and trailing gaps. + Addr Addr + Size btrfsvol.AddrDelta + + Runs []btrfssum.SumRun[Addr] +} + +var ( + _ lowmemjson.Encodable = SumRunWithGaps[btrfsvol.LogicalAddr]{} + _ lowmemjson.Decodable = (*SumRunWithGaps[btrfsvol.LogicalAddr])(nil) +) + +// PatLen implements kmpPattern[int, ShortSum]. +func (sg SumRunWithGaps[Addr]) PatLen() int { + return int(sg.Size / btrfssum.BlockSize) +} + +func (sg SumRunWithGaps[Addr]) PctFull() float64 { + total := sg.PatLen() + var full int + for _, run := range sg.Runs { + full += run.SeqLen() + } + return float64(full) / float64(total) +} + +func (sg SumRunWithGaps[Addr]) RunForAddr(addr Addr) (btrfssum.SumRun[Addr], Addr, bool) { + for _, run := range sg.Runs { + if run.Addr > addr { + return btrfssum.SumRun[Addr]{}, run.Addr, false + } + if run.Addr.Add(run.Size()) <= addr { + continue + } + return run, 0, true + } + return btrfssum.SumRun[Addr]{}, math.MaxInt64, false +} + +func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, bool) { + if addr < sg.Addr || addr >= sg.Addr.Add(sg.Size) { + return "", false + } + runIdx, ok := slices.Search(sg.Runs, func(run btrfssum.SumRun[Addr]) int { + switch { + case addr < run.Addr: + return -1 + case addr >= run.Addr.Add(run.Size()): + return 1 + default: + return 0 + } + }) + if !ok { + return "", false + } + run := sg.Runs[runIdx] + off := int((addr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize + return run.Sums[off : off+run.ChecksumSize], true +} + +func (sg SumRunWithGaps[Addr]) Walk(ctx context.Context, fn func(Addr, btrfssum.ShortSum) error) error { + for _, run := range sg.Runs { + if err := run.Walk(ctx, fn); err != nil { + return err + } + } + return nil +} + +// PatGet implements kmpPattern[int, ShortSum]. +func (sg SumRunWithGaps[Addr]) PatGet(sumIdx int) (btrfssum.ShortSum, bool) { + addr := sg.Addr.Add(btrfsvol.AddrDelta(sumIdx) * btrfssum.BlockSize) + return sg.SumForAddr(addr) +} + +func (sg SumRunWithGaps[Addr]) EncodeJSON(w io.Writer) error { + if _, err := fmt.Fprintf(w, `{"Addr":%d,"Size":%d,"Runs":[`, sg.Addr, sg.Size); err != nil { + return err + } + cur := sg.Addr + for i, run := range sg.Runs { + if i > 0 { + if _, err := w.Write([]byte{','}); err != nil { + return err + } + } + switch { + case run.Addr < cur: + return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, run.Addr, cur) + case run.Addr > cur: + if _, err := fmt.Fprintf(w, `{"Gap":%d},`, run.Addr.Sub(cur)); err != nil { + return err + } + fallthrough + default: + if err := lowmemjson.NewEncoder(w).Encode(run); err != nil { + return err + } + cur = run.Addr.Add(run.Size()) + } + } + end := sg.Addr.Add(sg.Size) + switch { + case end < cur: + return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, end, cur) + case end > cur: + if _, err := fmt.Fprintf(w, `,{"Gap":%d}`, end.Sub(cur)); err != nil { + return err + } + } + if _, err := w.Write([]byte("]}")); err != nil { + return err + } + return nil +} + +func (sg *SumRunWithGaps[Addr]) DecodeJSON(r io.RuneScanner) error { + *sg = SumRunWithGaps[Addr]{} + var name string + return lowmemjson.DecodeObject(r, + func(r io.RuneScanner) error { + return lowmemjson.NewDecoder(r).Decode(&name) + }, + func(r io.RuneScanner) error { + switch name { + case "Addr": + return lowmemjson.NewDecoder(r).Decode(&sg.Addr) + case "Size": + return lowmemjson.NewDecoder(r).Decode(&sg.Size) + case "Runs": + return lowmemjson.DecodeArray(r, func(r io.RuneScanner) error { + var run btrfssum.SumRun[Addr] + if err := lowmemjson.NewDecoder(r).Decode(&run); err != nil { + return err + } + if run.ChecksumSize > 0 { + sg.Runs = append(sg.Runs, run) + } + return nil + }) + default: + return fmt.Errorf("unknown key %q", name) + } + }) +} diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d index 9d14adf..9d14adf 100644 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 index 269f061..269f061 100644 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 index b8f1562..b8f1562 100644 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 index be67506..be67506 100644 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 diff --git a/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d index c3bfa37..c3bfa37 100644 --- a/lib/diskio/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d diff --git a/lib/diskio/kmp.go b/lib/diskio/kmp.go deleted file mode 100644 index 6949aa4..0000000 --- a/lib/diskio/kmp.go +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package diskio - -import ( - "errors" - "io" -) - -var ErrWildcard = errors.New("wildcard") - -func kmpEq2[K ~int64, V comparable](aS Sequence[K, V], aI K, bS Sequence[K, V], bI K) bool { - aV, aErr := aS.Get(aI) - bV, bErr := bS.Get(bI) - if aErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if aErr == ErrWildcard || errors.Is(aErr, ErrWildcard) { - aV = bV - aErr = nil - } else { - panic(aErr) - } - } - if bErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { - bV = aV - bErr = nil - } else { - panic(bErr) - } - } - if aErr != nil || bErr != nil { - return false - } - return aV == bV -} - -func kmpEq1[K ~int64, V comparable](aV V, bS Sequence[K, V], bI K) bool { - bV, bErr := bS.Get(bI) - if bErr != nil { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if bErr == ErrWildcard || errors.Is(bErr, ErrWildcard) { - return true - } - panic(bErr) - } - return aV == bV -} - -// buildKMPTable takes the string 'substr', and returns a table such -// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and -// 'substr[:val] == substr[matchLen-val:matchLen]'. -func buildKMPTable[K ~int64, V comparable](substr Sequence[K, V]) ([]K, error) { - var substrLen K - for { - //nolint:errorlint // The == is just a fast-path; we still fall back to errors.Is. - if _, err := substr.Get(substrLen); err != nil && !(err == ErrWildcard || errors.Is(err, ErrWildcard)) { - if errors.Is(err, io.EOF) { - break - } - return nil, err - } - substrLen++ - } - - table := make([]K, substrLen) - for j := K(0); j < substrLen; j++ { - if j == 0 { - // First entry must always be 0 (in order to - // satisfy 'val < matchLen'). - continue - } - val := table[j-1] - // not a match; go back - for val > 0 && !kmpEq2(substr, j, substr, val) { - val = table[val-1] - } - // is a match; go forward - if kmpEq2(substr, val, substr, j) { - val++ - } - table[j] = val - } - return table, nil -} - -// IndexAll returns the starting-position of all possibly-overlapping -// occurrences of 'substr' in the 'str' sequence. -// -// Will hop around in 'substr', but will only get the natural sequence -// [0...) in order from 'str'. When hopping around in 'substr' it -// assumes that once it has gotten a given index without error, it can -// continue to do so without error; errors appearing later will cause -// panics. -// -// Will panic if the length of 'substr' is 0. -// -// The 'substr' may include wildcard characters by returning -// ErrWildcard for a position. -// -// Uses the Knuth-Morris-Pratt algorithm. -func IndexAll[K ~int64, V comparable](str, substr Sequence[K, V]) ([]K, error) { - table, err := buildKMPTable(substr) - if err != nil { - return nil, err - } - substrLen := K(len(table)) - if substrLen == 0 { - panic(errors.New("diskio.IndexAll: empty substring")) - } - - var matches []K - var curMatchBeg K - var curMatchLen K - - for pos := K(0); ; pos++ { - chr, err := str.Get(pos) - if err != nil { - if errors.Is(err, io.EOF) { - err = nil - } - return matches, err - } - - // Consider 'chr' - for curMatchLen > 0 && !kmpEq1(chr, substr, curMatchLen) { // shorten the match - overlap := table[curMatchLen-1] - curMatchBeg += curMatchLen - overlap - curMatchLen = overlap - } - if kmpEq1(chr, substr, curMatchLen) { // lengthen the match - if curMatchLen == 0 { - curMatchBeg = pos - } - curMatchLen++ - if curMatchLen == substrLen { - matches = append(matches, curMatchBeg) - overlap := table[curMatchLen-1] - curMatchBeg += curMatchLen - overlap - curMatchLen = overlap - } - } - } -} diff --git a/lib/diskio/seq.go b/lib/diskio/seq.go index 3c5f4ae..f8e6ea8 100644 --- a/lib/diskio/seq.go +++ b/lib/diskio/seq.go @@ -1,68 +1,43 @@ -// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> // // SPDX-License-Identifier: GPL-2.0-or-later package diskio -import ( - "fmt" - "io" -) - // interface ///////////////////////////////////////////////////////// -type Sequence[K ~int64, V any] interface { +type Sequence[K ~int64 | ~int, V any] interface { + SeqLen() K // Get the value at 'pos' in the sequence. Positions start at - // 0 and increment naturally. Return an error that is io.EOF - // if 'pos' is past the end of the sequence'. - Get(pos K) (V, error) + // 0 and increment naturally. It is invalid to call + // SeqGet(pos) with a pos that is >= SeqLen(). + SeqGet(pos K) V } // implementation: slice ///////////////////////////////////////////// -type SliceSequence[K ~int64, V any] []V +type SliceSequence[K ~int64 | ~int, V any] []V + +var _ Sequence[assertAddr, byte] = SliceSequence[assertAddr, byte](nil) -var _ Sequence[assertAddr, byte] = SliceSequence[assertAddr, byte]([]byte(nil)) +func (s SliceSequence[K, V]) SeqLen() K { + return K(len(s)) +} -func (s SliceSequence[K, V]) Get(i K) (V, error) { - if i >= K(len(s)) { - var v V - return v, io.EOF - } - return s[int(i)], nil +func (s SliceSequence[K, V]) SeqGet(i K) V { + return s[int(i)] } // implementation: string //////////////////////////////////////////// -type StringSequence[K ~int64] string +type StringSequence[K ~int64 | ~int] string var _ Sequence[assertAddr, byte] = StringSequence[assertAddr]("") -func (s StringSequence[K]) Get(i K) (byte, error) { - if i >= K(len(s)) { - return 0, io.EOF - } - return s[int(i)], nil +func (s StringSequence[K]) SeqLen() K { + return K(len(s)) } -// implementation: io.ByteReader ///////////////////////////////////// - -type ByteReaderSequence[K ~int64] struct { - R io.ByteReader - pos K -} - -var _ Sequence[assertAddr, byte] = &ByteReaderSequence[assertAddr]{R: nil} - -func (s *ByteReaderSequence[K]) Get(i K) (byte, error) { - if i != s.pos { - return 0, fmt.Errorf("%T.Get(%v): can only call .Get(%v)", - s, i, s.pos) - } - chr, err := s.R.ReadByte() - if err != nil { - return chr, err - } - s.pos++ - return chr, nil +func (s StringSequence[K]) SeqGet(i K) byte { + return s[int(i)] } |