From 28c5bd2ae25aa11502f4dcdcb379183d4229124f Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 26 Aug 2022 23:24:53 -0600 Subject: slooooowwwww --- .../btrfsinspect/rebuildmappings/fuzzymatchsums.go | 171 ++++++++++++++++++ .../btrfsinspect/rebuildmappings/matchsums.go | 193 --------------------- .../rebuildmappings/rebuildmappings.go | 13 +- 3 files changed, 180 insertions(+), 197 deletions(-) create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go (limited to 'lib/btrfsprogs/btrfsinspect/rebuildmappings') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go new file mode 100644 index 0000000..ea95dd4 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/fuzzymatchsums.go @@ -0,0 +1,171 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "sort" + "strconv" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" +) + +type fuzzyRecord struct { + PAddr btrfsvol.QualifiedPhysicalAddr + N int64 + D int64 +} + +func (r fuzzyRecord) Pct() float64 { + return float64(r.N) / float64(r.D) +} + +func (a fuzzyRecord) Cmp(b fuzzyRecord) int { + aF := 1.0 - a.Pct() + bF := 1.0 - b.Pct() + switch { + case aF < bF: + return -1 + case aF > bF: + return 1 + default: + return 0 + } +} + +func fuzzyMatchBlockGroupSums(ctx context.Context, + fs *btrfs.FS, + blockgroups map[btrfsvol.LogicalAddr]BlockGroup, + physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], + logicalSums btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr], +) error { + dlog.Info(ctx, "... Pairing up blockgroups and sums...") + bgSums := make(map[btrfsvol.LogicalAddr]btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]) + for i, bgLAddr := range maps.SortedKeys(blockgroups) { + blockgroup := blockgroups[bgLAddr] + runs := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size) + bgSums[blockgroup.LAddr] = runs + dlog.Infof(ctx, "... (%v/%v) blockgroup[laddr=%v] has %v runs covering %v%%", + i+1, len(blockgroups), bgLAddr, len(runs.Runs), int(100*runs.PctFull())) + } + dlog.Info(ctx, "... ... done pairing") + + dlog.Info(ctx, "... Searching for unmapped blockgroups in unmapped physical regions...") + regions := ListUnmappedPhysicalRegions(fs) + bgMatches := make(map[btrfsvol.LogicalAddr]btrfsvol.QualifiedPhysicalAddr) + for i, bgLAddr := range maps.SortedKeys(blockgroups) { + bgRun := bgSums[bgLAddr] + if len(bgRun.Runs) == 0 { + dlog.Errorf(ctx, "... (%v/%v) blockgroup[laddr=%v] can't be matched because it has 0 runs", + i+1, len(bgSums), bgLAddr) + continue + } + + best := lowestN[fuzzyRecord]{N: 5} + if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error { + for paddr := region.Addr; true; paddr += btrfssum.BlockSize { + n, d, err := pctMatch(ctx, region, paddr, bgRun) + if err != nil { + return err + } + if d == 0 { + break + } + best.Insert(fuzzyRecord{ + PAddr: btrfsvol.QualifiedPhysicalAddr{ + Dev: devID, + Addr: paddr, + }, + N: n, + D: d, + }) + } + return nil + }); err != nil { + return err + } + + var pcts []string + for _, r := range best.Dat { + pcts = append(pcts, strconv.Itoa(int(100*r.Pct()))+"%") + } + lvl := dlog.LogLevelError + if len(best.Dat) > 0 && best.Dat[0].Pct() > 0.5 { + bgMatches[bgLAddr] = best.Dat[0].PAddr + lvl = dlog.LogLevelInfo + } + dlog.Logf(ctx, lvl, "... (%v/%v) blockgroup[laddr=%v] best %d matches are: %v", + i+1, len(bgSums), bgLAddr, len(pcts), pcts) + } + dlog.Info(ctx, "... ... done searching") + + dlog.Info(ctx, "... Applying those mappings...") + for _, bgLAddr := range maps.SortedKeys(bgMatches) { + paddr := bgMatches[bgLAddr] + blockgroup := blockgroups[bgLAddr] + mapping := btrfsvol.Mapping{ + LAddr: blockgroup.LAddr, + PAddr: paddr, + Size: blockgroup.Size, + SizeLocked: true, + Flags: containers.Optional[btrfsvol.BlockGroupFlags]{ + OK: true, + Val: blockgroup.Flags, + }, + } + if err := fs.LV.AddMapping(mapping); err != nil { + dlog.Errorf(ctx, "... error: %v", err) + continue + } + delete(blockgroups, bgLAddr) + } + dlog.Info(ctx, "... ... done applying") + + return nil +} + +type lowestN[T containers.Ordered[T]] struct { + N int + Dat []T +} + +func (l *lowestN[T]) Insert(v T) { + if len(l.Dat) < l.N { + l.Dat = append(l.Dat, v) + } else if v.Cmp(l.Dat[0]) < 0 { + l.Dat[0] = v + } else { + return + } + sort.Slice(l.Dat, func(i, j int) bool { + return l.Dat[i].Cmp(l.Dat[j]) < 0 + }) +} + +func pctMatch( + ctx context.Context, + searchspace btrfssum.SumRun[btrfsvol.PhysicalAddr], start btrfsvol.PhysicalAddr, + pattern btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr], +) (n, d int64, err error) { + if psize := searchspace.Size() - start.Sub(searchspace.Addr); psize < pattern.Size { + return + } + err = pattern.Walk(ctx, func(laddr btrfsvol.LogicalAddr, lsum btrfssum.ShortSum) error { + d++ + paddr := start.Add(laddr.Sub(pattern.Addr)) + psum, _ := searchspace.SumForAddr(paddr) + if psum == lsum { + n++ + } + return nil + }) + return +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go index 6126820..b882c77 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/matchsums.go @@ -97,196 +97,3 @@ func matchBlockGroupSums(ctx context.Context, return nil } - -/* TODO - - dlog.Info(ctx, "Reverse-indexing remaining unmapped logical sums...") - sum2laddrs := make(map[btrfssum.ShortSum][]btrfsvol.LogicalAddr) - var numUnmappedBlocks int64 - if err := logicalSums.Walk(ctx, func(laddr btrfsvol.LogicalAddr, sum btrfssum.ShortSum) error { - var dat [btrfssum.BlockSize]byte - if _, err := fs.ReadAt(dat[:], laddr); err != nil { - if errors.Is(err, btrfsvol.ErrCouldNotMap) { - sum2laddrs[sum] = append(sum2laddrs[sum], laddr) - numUnmappedBlocks++ - return nil - } - return err - } - return nil - }); err != nil { - return err - } - dlog.Infof(ctx, "... done reverse-indexing; %v still unmapped logical sums", - numUnmappedBlocks) - - dlog.Info(ctx, "Cross-referencing sums to re-construct mappings...") - newMappings := &ExtentMappings{ - InLV: &fs.LV, - InBlockGroups: blockgroups, - InSums: sums, - InReverseSums: sum2laddrs, - } - regions := ListPhysicalRegions(fs) - for _, devID := range maps.SortedKeys(regions) { - if err := newMappings.ScanOneDevice(ctx, - devID, devs[devID].Name(), - regions[devID], - ); err != nil { - return err - } - } - dlog.Info(ctx, "... done cross-referencing") - - dlog.Info(ctx, "Applying those mappings...") - for laddr, mappings := range newMappings.OutSum2mappings { - if len(mappings) > 1 { - dlog.Errorf(ctx, "multiple possibilities for laddr=%v :", laddr) - for _, mapping := range mappings { - dlog.Errorf(ctx, " - %#v", *mapping) - } - continue - } - if err := fs.LV.AddMapping(*mappings[0]); err != nil { - dlog.Error(ctx, err) - } - } - dlog.Info(ctx, "... done applying") - - return nil -} - -type ExtentMappings struct { - // input - InLV *btrfsvol.LogicalVolume[*btrfs.Device] - InBlockGroups *BlockGroupTree - InSums AllSums - InReverseSums map[ShortSum][]btrfsvol.LogicalAddr - - // state - internedMappings map[btrfsvol.Mapping]*btrfsvol.Mapping - - // output - OutSum2mappings map[ShortSum][]*btrfsvol.Mapping -} - -func (em *ExtentMappings) considerMapping(ctx context.Context, laddr btrfsvol.LogicalAddr, paddr btrfsvol.QualifiedPhysicalAddr) (btrfsvol.Mapping, bool) { - blockgroup := LookupBlockGroup(em.InBlockGroups, laddr, btrfssum.BlockSize) - if blockgroup == nil { - return btrfsvol.Mapping{ - LAddr: laddr, - PAddr: paddr, - Size: btrfssum.BlockSize, - }, true - } - mapping := btrfsvol.Mapping{ - LAddr: blockgroup.LAddr, - PAddr: btrfsvol.QualifiedPhysicalAddr{ - Dev: paddr.Dev, - Addr: paddr.Addr.Add(laddr.Sub(blockgroup.LAddr)), - }, - Size: blockgroup.Size, - SizeLocked: true, - Flags: containers.Optional[btrfsvol.BlockGroupFlags]{ - OK: true, - Val: blockgroup.Flags, - }, - } - if !em.InLV.CouldAddMapping(mapping) { - return btrfsvol.Mapping{}, false - } - - for offset := btrfsvol.AddrDelta(0); offset <= mapping.Size; offset += btrfssum.BlockSize { - expCSum, ok := em.InSums.SumForLAddr(mapping.LAddr.Add(offset)) - if !ok { - continue - } - actCSum, _ := em.InSums.SumForPAddr(mapping.PAddr.Add(offset)) - if actCSum != expCSum { - return btrfsvol.Mapping{}, false - } - } - return mapping, true -} - -func (em *ExtentMappings) addMapping(sum ShortSum, mapping btrfsvol.Mapping) { - interned := em.internedMappings[mapping] - if interned == nil { - interned = &mapping - em.internedMappings[mapping] = interned - } - - em.OutSum2mappings[sum] = append(em.OutSum2mappings[sum], interned) -} - -func (em *ExtentMappings) ScanOneDevice( - ctx context.Context, - devID btrfsvol.DeviceID, devName string, - regions []PhysicalRegion, -) error { - if em.internedMappings == nil { - em.internedMappings = make(map[btrfsvol.Mapping]*btrfsvol.Mapping) - } - if em.OutSum2mappings == nil { - em.OutSum2mappings = make(map[ShortSum][]*btrfsvol.Mapping) - } - - dlog.Infof(ctx, "... dev[%q] Scanning for extents...", devName) - - var totalMappings int - _ = WalkRegions(ctx, regions, btrfssum.BlockSize, - func(_, _ int64) {}, - func(paddr btrfsvol.PhysicalAddr) error { - qpaddr := btrfsvol.QualifiedPhysicalAddr{ - Dev: devID, - Addr: paddr, - } - sum, _ := em.InSums.SumForPAddr(qpaddr) - totalMappings += len(em.InReverseSums[sum]) - return nil - }, - ) - - lastProgress := -1 - considered := 0 - accepted := 0 - progress := func() { - pct := int(100 * 10000 * float64(considered) / float64(totalMappings)) - if pct != lastProgress || considered == totalMappings { - dlog.Infof(ctx, "... dev[%q] scanned %v%% (considered %v/%v pairings, accepted %v)", - devName, float64(pct)/10000.0, considered, totalMappings, accepted) - lastProgress = pct - } - } - return WalkRegions(ctx, regions, btrfssum.BlockSize, - func(_, _ int64) { - progress() - }, - func(paddr btrfsvol.PhysicalAddr) error { - qpaddr := btrfsvol.QualifiedPhysicalAddr{ - Dev: devID, - Addr: paddr, - } - sum, _ := em.InSums.SumForPAddr(qpaddr) - for i, laddr := range em.InReverseSums[sum] { - if i%100 == 0 { - if err := ctx.Err(); err != nil { - return err - } - } - mapping, ok := em.considerMapping(ctx, laddr, qpaddr) - considered++ - if !ok { - continue - } - em.addMapping(sum, mapping) - accepted++ - progress() - } - - return nil - }, - ) -} - -*/ diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go index 15cdf11..4d7c112 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go @@ -51,8 +51,8 @@ func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect dlog.Infof(ctx, "plan: 2/6 process %d device extents", numDevExts) dlog.Infof(ctx, "plan: 3/6 process %d nodes", numNodes) dlog.Infof(ctx, "plan: 4/6 process %d block groups", numBlockGroups) - dlog.Infof(ctx, "plan: 5/6 process sums of block groups") - dlog.Infof(ctx, "plan: 6/6 process remaining sums") + dlog.Infof(ctx, "plan: 5/6 search for block groups in checksum map (exact)") + dlog.Infof(ctx, "plan: 6/6 search for block groups in checksum map (fuzzy)") dlog.Infof(ctx, "1/6: Processing %d chunks...", numChunks) for _, devID := range devIDs { @@ -143,14 +143,19 @@ func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect } dlog.Info(ctx, "... done processing block groups") - dlog.Infof(ctx, "5/6: Processing sums of %d block groups", len(bgs)) + dlog.Infof(ctx, "5/6: Searching for %d block groups in checksum map (exact)...", len(bgs)) physicalSums := ExtractPhysicalSums(scanResults) logicalSums := ExtractLogicalSums(ctx, scanResults) if err := matchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil { return err } + dlog.Info(ctx, "... done searching for exact block groups") - dlog.Infof(ctx, "6/6: process remaining sums: TODO") + dlog.Infof(ctx, "6/6: Searching for %d block groups in checksum map (fuzzy)...", len(bgs)) + if err := fuzzyMatchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil { + return err + } + dlog.Info(ctx, "... done searching for fuzzy block groups") dlog.Info(ctx, "report:") p := message.NewPrinter(message.MatchLanguage("en")) -- cgit v1.2.3-2-g168b