From ecb21f33362367d44215af73f060f32485155893 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 29 Aug 2022 22:42:12 -0600 Subject: wip --- .../btrfsinspect/rebuildnodes/rebuildnodes.go | 3 +- .../btrfsinspect/rebuildnodes/s2_lostandfound.go | 36 ++++++------- .../btrfsinspect/rebuildnodes/s3_reinit.go | 63 ++++++++++++++-------- .../btrfsinspect/rebuildnodes/s4_reattach.go | 41 +++++++++++--- 4 files changed, 93 insertions(+), 50 deletions(-) (limited to 'lib/btrfsprogs') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuildnodes.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuildnodes.go index e5365c9..941ea4c 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuildnodes.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/rebuildnodes.go @@ -41,11 +41,9 @@ func RebuildNodes(ctx context.Context, fs *btrfs.FS, nodeScanResults btrfsinspec return nil, err } - dlog.Info(ctx, "Attaching lost+found nodes to rebuilt nodes...") if err := reAttachNodes(ctx, nfs, foundRoots, rebuiltNodes); err != nil { return nil, err } - dlog.Info(ctx, "... done attaching") return rebuiltNodes, nil } @@ -149,5 +147,6 @@ func getChunkTreeUUID(ctx context.Context, fs _FS) (btrfsprim.UUID, bool) { type RebuiltNode struct { Err string MinKey, MaxKey btrfsprim.Key + InTrees containers.Set[btrfsprim.ObjID] btrfstree.Node } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/s2_lostandfound.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/s2_lostandfound.go index 3fc361c..4efaeab 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/s2_lostandfound.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/s2_lostandfound.go @@ -35,18 +35,19 @@ func lostAndFoundNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect } } - attachedNodes := make(map[btrfsvol.LogicalAddr]struct{}) + visitedNodes := make(map[btrfsvol.LogicalAddr]struct{}) btrfsutil.WalkAllTrees(ctx, fs, btrfsutil.WalkAllTreesHandler{ TreeWalkHandler: btrfstree.TreeWalkHandler{ + // Don't use `PreNode` because we don't want to run this on bad nodes. Node: func(path btrfstree.TreePath, _ *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]) error { addr := path.Node(-1).ToNodeAddr - if _, alreadyVisited := attachedNodes[addr]; alreadyVisited { + if _, alreadyVisited := visitedNodes[addr]; alreadyVisited { // Can happen because of COW subvolumes; // this is really a DAG not a tree. return iofs.SkipDir } - attachedNodes[addr] = struct{}{} - progress(len(attachedNodes)) + visitedNodes[addr] = struct{}{} + progress(len(visitedNodes)) return nil }, }, @@ -61,17 +62,17 @@ func lostAndFoundNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect orphanedNodes := make(map[btrfsvol.LogicalAddr]int) for _, devResults := range nodeScanResults { for laddr := range devResults.FoundNodes { - if _, attached := attachedNodes[laddr]; !attached { + if _, attached := visitedNodes[laddr]; !attached { orphanedNodes[laddr] = 0 } } } - if len(attachedNodes)+len(orphanedNodes) != total { + if len(visitedNodes)+len(orphanedNodes) != total { panic("should not happen") } dlog.Infof(ctx, "... (finished processing %v attached nodes, proceeding to process %v lost nodes, for a total of %v)", - len(attachedNodes), len(orphanedNodes), len(attachedNodes)+len(orphanedNodes)) + len(visitedNodes), len(orphanedNodes), len(visitedNodes)+len(orphanedNodes)) // 'orphanedRoots' is a subset of 'orphanedNodes'; start with // it as the complete orphanedNodes, and then remove entries. @@ -79,10 +80,7 @@ func lostAndFoundNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect for node := range orphanedNodes { orphanedRoots[node] = struct{}{} } - done := len(attachedNodes) for potentialRoot := range orphanedNodes { - done++ - progress(done) if orphanedNodes[potentialRoot] > 1 { continue } @@ -91,23 +89,25 @@ func lostAndFoundNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect // do nothing }, btrfstree.TreeWalkHandler{ - PreNode: func(path btrfstree.TreePath) error { - nodeAddr := path.Node(-1).ToNodeAddr - if nodeAddr != potentialRoot { - delete(orphanedRoots, nodeAddr) + // Don't use `PreNode` because we don't want to run this on bad + // nodes (it'd screw up `len(visitedNodes)`). + Node: func(path btrfstree.TreePath, _ *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]) error { + addr := path.Node(-1).ToNodeAddr + if addr != potentialRoot { + delete(orphanedRoots, addr) } - visitCnt, ok := orphanedNodes[nodeAddr] - if visitCnt > 0 || !ok { + if _, alreadyVisited := visitedNodes[addr]; alreadyVisited { return iofs.SkipDir } - orphanedNodes[nodeAddr] = visitCnt + 1 + visitedNodes[addr] = struct{}{} + progress(len(visitedNodes)) return nil }, }, ) } - if done != total { + if len(visitedNodes) != total { panic("should not happen") } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/s3_reinit.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/s3_reinit.go index eb49435..49bc989 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/s3_reinit.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/s3_reinit.go @@ -8,13 +8,16 @@ import ( "context" "fmt" iofs "io/fs" + "reflect" "github.com/datawire/dlib/dlog" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsutil" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" "git.lukeshu.com/btrfs-progs-ng/lib/diskio" ) @@ -33,8 +36,7 @@ func reInitBrokenNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect lastPct := -1 total := countNodes(nodeScanResults) - done := 0 - progress := func() { + progress := func(done int) { pct := int(100 * float64(done) / float64(total)) if pct != lastPct || done == total { dlog.Infof(ctx, "... %v%% (%v/%v)", @@ -54,29 +56,44 @@ func reInitBrokenNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect return iofs.SkipDir } visitedNodes[addr] = struct{}{} - done++ - progress() + progress(len(visitedNodes)) return nil }, - BadNode: func(path btrfstree.TreePath, node *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node], err error) error { - min, max := spanOfTreePath(fs, path) - rebuiltNodes[path.Node(-1).ToNodeAddr] = &RebuiltNode{ - Err: err.Error(), - MinKey: min, - MaxKey: max, - Node: btrfstree.Node{ - Size: sb.NodeSize, - ChecksumType: sb.ChecksumType, - Head: btrfstree.NodeHeader{ - MetadataUUID: sb.EffectiveMetadataUUID(), - Addr: path.Node(-1).ToNodeAddr, - ChunkTreeUUID: chunkTreeUUID, - Owner: path.Node(-1).FromTree, // FIXME: handle it being a child tree? - Generation: path.Node(-1).FromGeneration, - Level: path.Node(-1).ToNodeLevel, - }, + BadNode: func(path btrfstree.TreePath, _ *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node], err error) error { + node := btrfstree.Node{ + Size: sb.NodeSize, + ChecksumType: sb.ChecksumType, + Head: btrfstree.NodeHeader{ + MetadataUUID: sb.EffectiveMetadataUUID(), + Addr: path.Node(-1).ToNodeAddr, + ChunkTreeUUID: chunkTreeUUID, + //Owner: TBD, // see RebuiltNode.InTrees + Generation: path.Node(-1).FromGeneration, + Level: path.Node(-1).ToNodeLevel, }, } + min, max := spanOfTreePath(fs, path) + if other, ok := rebuiltNodes[path.Node(-1).ToNodeAddr]; ok { + if !reflect.DeepEqual(other.Node, node) { + dlog.Errorf(ctx, "... mismatch: %v != %v", node, other.Node) + return err + } + if min.Cmp(other.MinKey) > 0 { // if min > other.MinKey { + other.MinKey = min // take the max of the two + } + if max.Cmp(other.MaxKey) < 0 { // if max < other.MaxKey { + other.MaxKey = max // take the min of the two + } + other.InTrees.Insert(path.Node(-1).FromTree) + } else { + rebuiltNodes[path.Node(-1).ToNodeAddr] = &RebuiltNode{ + Err: err.Error(), + MinKey: min, + MaxKey: max, + InTrees: containers.Set[btrfsprim.ObjID]{path.Node(-1).FromTree: struct{}{}}, + Node: node, + } + } return err }, } @@ -85,7 +102,7 @@ func reInitBrokenNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect // nodeScanResults so that we don't need to specifically check // if any of the root nodes referenced directly by the // superblock are dead. - progress() + progress(len(visitedNodes)) btrfsutil.WalkAllTrees(ctx, fs, btrfsutil.WalkAllTreesHandler{ Err: func(err *btrfsutil.WalkError) { // do nothing @@ -100,7 +117,7 @@ func reInitBrokenNodes(ctx context.Context, fs _FS, nodeScanResults btrfsinspect walkHandler) } - if done != total { + if len(visitedNodes) != total { panic("should not happen") } diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/s4_reattach.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/s4_reattach.go index 8f3553e..32269fc 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/s4_reattach.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/s4_reattach.go @@ -14,20 +14,30 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" "git.lukeshu.com/btrfs-progs-ng/lib/slices" ) func reAttachNodes(ctx context.Context, fs _FS, foundRoots map[btrfsvol.LogicalAddr]struct{}, rebuiltNodes map[btrfsvol.LogicalAddr]*RebuiltNode) error { + dlog.Info(ctx, "Attaching lost+found nodes to rebuilt nodes...") + + sb, err := fs.Superblock() + if err != nil { + return err + } + // Index 'rebuiltNodes' for fast lookups. + dlog.Info(ctx, "... indexing rebuilt nodes...") gaps := make(map[btrfsprim.ObjID]map[uint8][]*RebuiltNode) maxLevel := make(map[btrfsprim.ObjID]uint8) for _, node := range rebuiltNodes { - maxLevel[node.Head.Owner] = slices.Max(maxLevel[node.Head.Owner], node.Head.Level) - - if gaps[node.Head.Owner] == nil { - gaps[node.Head.Owner] = make(map[uint8][]*RebuiltNode) + for treeID := range node.InTrees { + maxLevel[treeID] = slices.Max(maxLevel[treeID], node.Head.Level) + if gaps[treeID] == nil { + gaps[treeID] = make(map[uint8][]*RebuiltNode) + } + gaps[treeID][node.Head.Level] = append(gaps[treeID][node.Head.Level], node) } - gaps[node.Head.Owner][node.Head.Level] = append(gaps[node.Head.Owner][node.Head.Level], node) } for _, byTreeID := range gaps { for _, slice := range byTreeID { @@ -36,10 +46,22 @@ func reAttachNodes(ctx context.Context, fs _FS, foundRoots map[btrfsvol.LogicalA }) } } + dlog.Info(ctx, "... done indexing") // Attach foundRoots to the gaps. - sb, _ := fs.Superblock() - for foundLAddr := range foundRoots { + dlog.Info(ctx, "... attaching nodes...") + lastPct := -1 + progress := func(done int) { + pct := int(100 * float64(done) / float64(len(foundRoots))) + if pct != lastPct || done == len(foundRoots) { + dlog.Infof(ctx, "... %v%% (%v/%v)", + pct, done, len(foundRoots)) + lastPct = pct + } + } + numAttached := 0 + for i, foundLAddr := range maps.SortedKeys(foundRoots) { + progress(i) foundRef, err := btrfstree.ReadNode[btrfsvol.LogicalAddr](fs, *sb, foundLAddr, btrfstree.NodeExpectations{ LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: foundLAddr}, }) @@ -84,12 +106,17 @@ func reAttachNodes(ctx context.Context, fs _FS, foundRoots map[btrfsvol.LogicalA Generation: foundRef.Data.Head.Generation, }) attached = true + numAttached++ } if !attached { dlog.Errorf(ctx, "could not find a broken node to attach node to reattach node@%v to", foundRef.Addr) } } + progress(len(foundRoots)) + dlog.Info(ctx, "... ... done attaching") + dlog.Infof(ctx, "... re-attached %d nodes (%v%% success rate)", + numAttached, int(100*float64(numAttached)/float64(len(foundRoots)))) return nil } -- cgit v1.2.3-2-g168b