From 51ab81566f946608822983b5a1cf133fb9fc19bf Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 25 Aug 2022 00:10:09 -0600 Subject: discard old sums --- .../btrfsinspect/rebuildmappings/logicalsums.go | 164 +++++++++++++++------ 1 file changed, 123 insertions(+), 41 deletions(-) diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go index 3430502..1a93a26 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/logicalsums.go @@ -6,64 +6,146 @@ package rebuildmappings import ( "context" + "sort" "strings" "github.com/datawire/dlib/dlog" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" - "git.lukeshu.com/btrfs-progs-ng/lib/maps" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" "git.lukeshu.com/btrfs-progs-ng/lib/slices" ) func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] { - dlog.Info(ctx, "... Mapping the logical address space...") - type record struct { - Gen btrfs.Generation - Sum btrfssum.ShortSum - } - addrspace := make(map[btrfsvol.LogicalAddr]record) - var sumSize int + var records []btrfsinspect.SysExtentCSum for _, devResults := range scanResults { - sumSize = devResults.Checksums.ChecksumSize - for _, sumItem := range devResults.FoundExtentCSums { - _ = sumItem.Sums.Walk(ctx, func(pos btrfsvol.LogicalAddr, sum btrfssum.ShortSum) error { - new := record{ - Gen: sumItem.Generation, - Sum: sum, - } - if old, ok := addrspace[pos]; ok { - switch { - case old.Gen > new.Gen: - // do nothing - case old.Gen < new.Gen: - addrspace[pos] = new - case old.Gen == new.Gen: - if old != new { - dlog.Errorf(ctx, "mismatch of laddr=%v sum: %v != %v", pos, old, new) - } - } - } else { - addrspace[pos] = new + records = append(records, devResults.FoundExtentCSums...) + } + // Sort higher-generation items earlier; then sort by addr. + sort.Slice(records, func(i, j int) bool { + a, b := records[i], records[j] + switch { + case a.Generation > b.Generation: + return true + case a.Generation < b.Generation: + return false + default: + return a.Sums.Addr < b.Sums.Addr + } + }) + if len(records) == 0 { + return btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr]{} + } + sumSize := records[0].Sums.ChecksumSize + + // Now build them in to a coherent address space. If an item + // conflicts with a previously added item, ignore it (let + // higher generations win; we already sorted to have higher + // generations sorted first). + addrspace := &containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum]{ + KeyFn: func(item btrfsinspect.SysExtentCSum) containers.NativeOrdered[btrfsvol.LogicalAddr] { + return containers.NativeOrdered[btrfsvol.LogicalAddr]{Val: item.Sums.Addr} + }, + } + for _, newRecord := range records { + for { + conflict := addrspace.Search(func(oldRecord btrfsinspect.SysExtentCSum) int { + switch { + case newRecord.Sums.Addr.Add(newRecord.Sums.Size()) <= oldRecord.Sums.Addr: + // 'newRecord' is wholly to the left of 'oldRecord'. + return -1 + case oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()) <= newRecord.Sums.Addr: + // 'oldRecord' is wholly to the left of 'newRecord'. + return 1 + default: + // There is some overlap. + return 0 } - return nil }) + if conflict == nil { + // We can insert it + addrspace.Insert(newRecord) + break + } + oldRecord := conflict.Value + if oldRecord == newRecord { + // Duplicates are to be expected. + break + } + if oldRecord.Generation > newRecord.Generation { + // Newer generation wins. + break + } + if oldRecord.Generation < newRecord.Generation { + // We sorted them! This shouldn't happen. + panic("should not happen") + } + // Since sums are stored multiple times (RAID?), but may + // be split between items differently between copies, we + // should take the union (after verifying that they + // agree on the overlapping part). + overlapBeg := slices.Max( + oldRecord.Sums.Addr, + newRecord.Sums.Addr) + overlapEnd := slices.Min( + oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()), + newRecord.Sums.Addr.Add(newRecord.Sums.Size())) + + oldOverlapBeg := int(overlapBeg.Sub(oldRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + oldOverlapEnd := int(overlapEnd.Sub(oldRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + oldOverlap := oldRecord.Sums.Sums[oldOverlapBeg:oldOverlapEnd] + + newOverlapBeg := int(overlapBeg.Sub(newRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + newOverlapEnd := int(overlapEnd.Sub(newRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + newOverlap := newRecord.Sums.Sums[newOverlapBeg:newOverlapEnd] + + if oldOverlap != newOverlap { + dlog.Errorf(ctx, "mismatch: {gen:%v, addr:%v, size:%v} conflicts with {gen:%v, addr:%v, size:%v}", + oldRecord.Generation, oldRecord.Sums.Addr, oldRecord.Sums.Size(), + newRecord.Generation, newRecord.Sums.Addr, newRecord.Sums.Size()) + break + } + // OK, we match, so take the union. + var prefix, suffix btrfssum.ShortSum + switch { + case oldRecord.Sums.Addr < overlapBeg: + prefix = oldRecord.Sums.Sums[:oldOverlapBeg] + case newRecord.Sums.Addr < overlapBeg: + prefix = newRecord.Sums.Sums[:newOverlapBeg] + } + switch { + case oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()) > overlapEnd: + suffix = oldRecord.Sums.Sums[oldOverlapEnd:] + case newRecord.Sums.Addr.Add(newRecord.Sums.Size()) > overlapEnd: + suffix = newRecord.Sums.Sums[newOverlapEnd:] + } + unionRecord := btrfsinspect.SysExtentCSum{ + Generation: oldRecord.Generation, + Sums: btrfsitem.ExtentCSum{ + SumRun: btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: oldRecord.Sums.ChecksumSize, + Addr: slices.Min(oldRecord.Sums.Addr, newRecord.Sums.Addr), + Sums: prefix + oldOverlap + suffix, + }, + }, + } + addrspace.Delete(containers.NativeOrdered[btrfsvol.LogicalAddr]{Val: oldRecord.Sums.Addr}) + newRecord = unionRecord + // loop around to check for more conflicts } } - dlog.Info(ctx, "... ... done mapping") + // Now flatten that RBTree in to a SumRunWithGaps. var flattened btrfssum.SumRunWithGaps[btrfsvol.LogicalAddr] - if len(addrspace) == 0 { - return flattened - } - - dlog.Info(ctx, "... Flattening the map ...") var curAddr btrfsvol.LogicalAddr var curSums strings.Builder - for _, laddr := range maps.SortedKeys(addrspace) { - if laddr != curAddr+(btrfsvol.LogicalAddr(curSums.Len()/sumSize)*btrfssum.BlockSize) { + _ = addrspace.Walk(func(node *containers.RBNode[btrfsinspect.SysExtentCSum]) error { + curEnd := curAddr + (btrfsvol.LogicalAddr(curSums.Len()/sumSize) * btrfssum.BlockSize) + if node.Value.Sums.Addr != curEnd { if curSums.Len() > 0 { flattened.Runs = append(flattened.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{ ChecksumSize: sumSize, @@ -71,11 +153,12 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice Sums: btrfssum.ShortSum(curSums.String()), }) } - curAddr = laddr + curAddr = node.Value.Sums.Addr curSums.Reset() } - curSums.WriteString(string(addrspace[laddr].Sum)) - } + curSums.WriteString(string(node.Value.Sums.Sums)) + return nil + }) if curSums.Len() > 0 { flattened.Runs = append(flattened.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{ ChecksumSize: sumSize, @@ -87,7 +170,6 @@ func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevice last := flattened.Runs[len(flattened.Runs)-1] end := last.Addr.Add(last.Size()) flattened.Size = end.Sub(flattened.Addr) - dlog.Info(ctx, "... ... done flattening") return flattened } -- cgit v1.2.3-2-g168b