From af795797565e642a9e7fd2a962add8e8b4d5756b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 21 Dec 2022 01:11:05 -0700 Subject: rebuildnodes/btrees: Implement a new rebuilt-trees system This will replace its use of btrfsutil.NewBrokenTrees. --- .../rebuildnodes/btrees/rebuilt_btrees.go | 343 +++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go new file mode 100644 index 0000000..28d93b9 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -0,0 +1,343 @@ +// Copyright (C) 2022 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrees + +import ( + "context" + "fmt" + "time" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +type itemPtr struct { + Node btrfsvol.LogicalAddr + Idx int +} + +type rebuiltTree struct { + // static + ID btrfsprim.ObjID + UUID btrfsprim.UUID + Parent *rebuiltTree + + // mutable + Roots containers.Set[btrfsvol.LogicalAddr] + Items containers.SortedMap[btrfsprim.Key, itemPtr] +} + +// isOwnerOK returns whether it is permissible for a node with +// .Head.Owner=owner to be in this tree. +func (t *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { + for { + if t == nil { + return false + } + if owner == t.ID { + return true + } + t = t.Parent + } +} + +// RebuiltTrees is an abstraction for rebuilding and accessing +// potentially broken btrees. +// +// It is conceptually a btrfstree.TreeOperator, and adds similar +// broken-tree handling to btrfsutil.BrokenTrees. However, the API is +// different thant btrfstree.TreeOperator, and is much more efficient +// than btrfsutil.BrokenTrees. +// +// The efficiency improvements are possible because of the API +// differences, which are necessary for how it is used in +// rebuildnodes: +// +// - it consumes an already-read graph.Graph instead of reading the +// graph itself +// +// - it does not use `btrfstree.TreePath` +// +// - it does not keep track of errors encountered in a tree +// +// A zero RebuiltTrees is invalid; it must be initialized with +// NewRebuiltTrees(). +type RebuiltTrees struct { + // static + rawFile diskio.File[btrfsvol.LogicalAddr] + sb btrfstree.Superblock + graph graph.Graph + + // static callbacks + cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool) + cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) + + // mutable + trees map[btrfsprim.ObjID]*rebuiltTree +} + +// NewRebuiltTrees returns a new RebuiltTrees instance. All of the +// callbacks must be non-nil. +func NewRebuiltTrees( + file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph graph.Graph, + cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool), + cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), +) *RebuiltTrees { + return &RebuiltTrees{ + rawFile: file, + sb: sb, + graph: graph, + + cbAddedItem: cbAddedItem, + cbLookupRoot: cbLookupRoot, + cbLookupUUID: cbLookupUUID, + + trees: make(map[btrfsprim.ObjID]*rebuiltTree), + } +} + +func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] { + graphInfo, ok := ts.graph.Nodes[laddr] + if !ok { + panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr)) + } + ref, err := btrfstree.ReadNode(ts.rawFile, ts.sb, laddr, btrfstree.NodeExpectations{ + LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr}, + Level: containers.Optional[uint8]{OK: true, Val: graphInfo.Level}, + Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: graphInfo.Generation}, + Owner: func(treeID btrfsprim.ObjID) error { + if treeID != graphInfo.Owner { + return fmt.Errorf("expected owner=%v but claims to have owner=%v", + graphInfo.Owner, treeID) + } + return nil + }, + MinItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MinItem}, + MaxItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MaxItem}, + }) + if err != nil { + panic(fmt.Errorf("should not happen: i/o error: %w", err)) + } + return ref +} + +func walk(graph graph.Graph, root btrfsvol.LogicalAddr, visited containers.Set[btrfsvol.LogicalAddr], fn func(btrfsvol.LogicalAddr) bool) { + if _, ok := graph.Nodes[root]; !ok { + return + } + if visited.Has(root) { + return + } + defer visited.Insert(root) + if !fn(root) { + return + } + for _, kp := range graph.EdgesFrom[root] { + walk(graph, kp.ToNode, visited, fn) + } +} + +type rootStats struct { + TreeID btrfsprim.ObjID + RootNode btrfsvol.LogicalAddr + + VisitedNodes int + TotalNodes int + AddedItems int +} + +func (s rootStats) String() string { + return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items)", + s.TreeID, s.RootNode, + int(100*float64(s.VisitedNodes)/float64(s.TotalNodes)), + s.VisitedNodes, s.TotalNodes, + s.AddedItems) +} + +// AddRoot adds an additional root node to an existing tree. It is +// useful to call .AddRoot() to re-attach part of the tree that has +// been broken off. +// +// It is invalid (panic) to call AddRoot for a tree without having +// called AddTree first. +func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, rootNode btrfsvol.LogicalAddr) { + tree := ts.trees[treeID] + tree.Roots.Insert(rootNode) + + visited := make(containers.Set[btrfsvol.LogicalAddr]) + progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second) + numAdded := 0 + progress := func() { + progressWriter.Set(rootStats{ + TreeID: treeID, + RootNode: rootNode, + VisitedNodes: len(visited), + TotalNodes: len(ts.graph.Nodes), + AddedItems: numAdded, + }) + } + progress() + walk(ts.graph, rootNode, visited, func(node btrfsvol.LogicalAddr) bool { + progress() + if !tree.isOwnerOK(ts.graph.Nodes[node].Owner) { + return false + } + if ts.graph.Nodes[node].Level == 0 { + for i, item := range ts.readNode(node).Data.BodyLeaf { + if _, exists := tree.Items.Load(item.Key); exists { + // This is a panic because I'm not really sure what the best way to + // handle this is, and so if this happens I want the program to crash + // and force me to figure out how to handle it. + panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID)) + } + tree.Items.Store(item.Key, itemPtr{ + Node: node, + Idx: i, + }) + numAdded++ + ts.cbAddedItem(ctx, treeID, item.Key) + } + } + return true + }) + progress() + progressWriter.Done() +} + +// AddTree initializes the given tree, returning true if it was able +// to do so, or false if there was a problem and nothing was done. +// The tree is initialized with the normal root node of the tree. +// +// Subsequent calls to AddTree for the same tree are no-ops. +func (ts *RebuiltTrees) AddTree(ctx context.Context, treeID btrfsprim.ObjID) (ok bool) { + return ts.addTree(ctx, treeID, nil) +} + +func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, stack []btrfsprim.ObjID) (ok bool) { + if _, ok := ts.trees[treeID]; ok { + return true + } + if slices.Contains(treeID, stack) { + return false + } + tree := &rebuiltTree{ + ID: treeID, + Roots: make(containers.Set[btrfsvol.LogicalAddr]), + } + var root btrfsvol.LogicalAddr + switch treeID { + case btrfsprim.ROOT_TREE_OBJECTID: + root = ts.sb.RootTree + case btrfsprim.CHUNK_TREE_OBJECTID: + root = ts.sb.ChunkTree + case btrfsprim.TREE_LOG_OBJECTID: + root = ts.sb.LogTree + case btrfsprim.BLOCK_GROUP_TREE_OBJECTID: + root = ts.sb.BlockGroupRoot + default: + stack := append(stack, treeID) + if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { + return false + } + rootItem, ok := ts.cbLookupRoot(ctx, treeID) + if !ok { + return false + } + root = rootItem.ByteNr + tree.UUID = rootItem.UUID + if rootItem.ParentUUID != (btrfsprim.UUID{}) { + if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { + return false + } + parentID, ok := ts.cbLookupUUID(ctx, rootItem.ParentUUID) + if !ok { + return false + } + if !ts.addTree(ctx, parentID, append(stack, treeID)) { + return false + } + tree.Parent = ts.trees[parentID] + } + } + ts.trees[treeID] = tree + if root != 0 { + ts.AddRoot(ctx, treeID, root) + } + return true +} + +// Load reads an item from a tree. +// +// It is not nescessary to call AddTree for that tree first; Load will +// call it for you. +func (ts *RebuiltTrees) Load(ctx context.Context, treeID btrfsprim.ObjID, key btrfsprim.Key) (item btrfsitem.Item, ok bool) { + if !ts.AddTree(ctx, treeID) { + return nil, false + } + ptr, ok := ts.trees[treeID].Items.Load(key) + if !ok { + return nil, false + } + return ts.readNode(ptr.Node).Data.BodyLeaf[ptr.Idx].Body, true +} + +// Search searches for an item from a tree. +// +// It is not nescessary to call AddTree for that tree first; Search +// will call it for you. +func (ts *RebuiltTrees) Search(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) (key btrfsprim.Key, ok bool) { + if !ts.AddTree(ctx, treeID) { + return btrfsprim.Key{}, false + } + k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ itemPtr) int { + return fn(k) + }) + return k, ok +} + +// Search searches for a range of items from a tree. +// +// It is not nescessary to call AddTree for that tree first; SearchAll +// will call it for you. +func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) []btrfsprim.Key { + if !ts.AddTree(ctx, treeID) { + return nil + } + kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ itemPtr) int { + return fn(k) + }) + if len(kvs) == 0 { + return nil + } + ret := make([]btrfsprim.Key, len(kvs)) + for i := range kvs { + ret[i] = kvs[i].K + } + return ret +} + +// ListRoots returns a listing of all initialized trees and their root +// nodes. +// +// Do not mutate the set of roots for a tree; it is a pointer to the +// RebuiltTrees' internal set! +func (ts *RebuiltTrees) ListRoots() map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr] { + ret := make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], len(ts.trees)) + for treeID := range ts.trees { + ret[treeID] = ts.trees[treeID].Roots + } + return ret +} -- cgit v1.2.3-2-g168b From 3f88bf0926046b245844a72a2a0c97654a410f0a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 21 Dec 2022 02:41:45 -0700 Subject: rebuildnodes/btrees: Cache nodes --- .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index 28d93b9..dea7ec3 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -85,7 +85,8 @@ type RebuiltTrees struct { cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) // mutable - trees map[btrfsprim.ObjID]*rebuiltTree + trees map[btrfsprim.ObjID]*rebuiltTree + nodeCache *containers.LRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]] } // NewRebuiltTrees returns a new RebuiltTrees instance. All of the @@ -105,15 +106,21 @@ func NewRebuiltTrees( cbLookupRoot: cbLookupRoot, cbLookupUUID: cbLookupUUID, - trees: make(map[btrfsprim.ObjID]*rebuiltTree), + trees: make(map[btrfsprim.ObjID]*rebuiltTree), + nodeCache: containers.NewLRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]](8), } } func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] { + if cached, ok := ts.nodeCache.Get(laddr); ok { + return cached + } + graphInfo, ok := ts.graph.Nodes[laddr] if !ok { panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr)) } + ref, err := btrfstree.ReadNode(ts.rawFile, ts.sb, laddr, btrfstree.NodeExpectations{ LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr}, Level: containers.Optional[uint8]{OK: true, Val: graphInfo.Level}, @@ -131,6 +138,9 @@ func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvo if err != nil { panic(fmt.Errorf("should not happen: i/o error: %w", err)) } + + ts.nodeCache.Add(laddr, ref) + return ref } -- cgit v1.2.3-2-g168b From 083444569ac882c470da8adee8059989ba176553 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 21 Dec 2022 03:59:49 -0700 Subject: rebuildnodes/btrees: Index all (orphaned) leafs --- .../rebuildnodes/btrees/rebuilt_btrees.go | 195 +++++++++++++++------ 1 file changed, 138 insertions(+), 57 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index dea7ec3..6e68a84 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -15,9 +15,10 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" "git.lukeshu.com/btrfs-progs-ng/lib/containers" "git.lukeshu.com/btrfs-progs-ng/lib/diskio" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" "git.lukeshu.com/btrfs-progs-ng/lib/slices" "git.lukeshu.com/btrfs-progs-ng/lib/textui" ) @@ -33,6 +34,9 @@ type rebuiltTree struct { UUID btrfsprim.UUID Parent *rebuiltTree + // all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree + leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] + // mutable Roots containers.Set[btrfsvol.LogicalAddr] Items containers.SortedMap[btrfsprim.Key, itemPtr] @@ -40,15 +44,15 @@ type rebuiltTree struct { // isOwnerOK returns whether it is permissible for a node with // .Head.Owner=owner to be in this tree. -func (t *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { +func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { for { - if t == nil { + if tree == nil { return false } - if owner == t.ID { + if owner == tree.ID { return true } - t = t.Parent + tree = tree.Parent } } @@ -71,13 +75,19 @@ func (t *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { // // - it does not keep track of errors encountered in a tree // +// Additionally, it provides a piece of functionality that +// btrfsutil.BrokenTrees does not: +// +// - it provides a .LeafToRoots() method to advise on what +// additional roots should be added +// // A zero RebuiltTrees is invalid; it must be initialized with // NewRebuiltTrees(). type RebuiltTrees struct { // static rawFile diskio.File[btrfsvol.LogicalAddr] sb btrfstree.Superblock - graph graph.Graph + graph pkggraph.Graph // static callbacks cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) @@ -92,7 +102,7 @@ type RebuiltTrees struct { // NewRebuiltTrees returns a new RebuiltTrees instance. All of the // callbacks must be non-nil. func NewRebuiltTrees( - file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph graph.Graph, + file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph pkggraph.Graph, cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool), cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), @@ -144,36 +154,20 @@ func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvo return ref } -func walk(graph graph.Graph, root btrfsvol.LogicalAddr, visited containers.Set[btrfsvol.LogicalAddr], fn func(btrfsvol.LogicalAddr) bool) { - if _, ok := graph.Nodes[root]; !ok { - return - } - if visited.Has(root) { - return - } - defer visited.Insert(root) - if !fn(root) { - return - } - for _, kp := range graph.EdgesFrom[root] { - walk(graph, kp.ToNode, visited, fn) - } -} - type rootStats struct { TreeID btrfsprim.ObjID RootNode btrfsvol.LogicalAddr - VisitedNodes int - TotalNodes int - AddedItems int + DoneLeafs int + TotalLeafs int + AddedItems int } func (s rootStats) String() string { return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items)", s.TreeID, s.RootNode, - int(100*float64(s.VisitedNodes)/float64(s.TotalNodes)), - s.VisitedNodes, s.TotalNodes, + int(100*float64(s.DoneLeafs)/float64(s.TotalLeafs)), + s.DoneLeafs, s.TotalLeafs, s.AddedItems) } @@ -187,43 +181,40 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo tree := ts.trees[treeID] tree.Roots.Insert(rootNode) - visited := make(containers.Set[btrfsvol.LogicalAddr]) progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second) numAdded := 0 - progress := func() { + progress := func(done int) { progressWriter.Set(rootStats{ - TreeID: treeID, - RootNode: rootNode, - VisitedNodes: len(visited), - TotalNodes: len(ts.graph.Nodes), - AddedItems: numAdded, + TreeID: treeID, + RootNode: rootNode, + DoneLeafs: done, + TotalLeafs: len(tree.leafToRoots), + AddedItems: numAdded, }) } - progress() - walk(ts.graph, rootNode, visited, func(node btrfsvol.LogicalAddr) bool { - progress() - if !tree.isOwnerOK(ts.graph.Nodes[node].Owner) { - return false + for i, leaf := range maps.SortedKeys(tree.leafToRoots) { + progress(i) + roots := tree.leafToRoots[leaf] + if !roots.Has(rootNode) { + continue } - if ts.graph.Nodes[node].Level == 0 { - for i, item := range ts.readNode(node).Data.BodyLeaf { - if _, exists := tree.Items.Load(item.Key); exists { - // This is a panic because I'm not really sure what the best way to - // handle this is, and so if this happens I want the program to crash - // and force me to figure out how to handle it. - panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID)) - } - tree.Items.Store(item.Key, itemPtr{ - Node: node, - Idx: i, - }) - numAdded++ - ts.cbAddedItem(ctx, treeID, item.Key) + for j, item := range ts.readNode(leaf).Data.BodyLeaf { + if _, exists := tree.Items.Load(item.Key); exists { + // This is a panic because I'm not really sure what the best way to + // handle this is, and so if this happens I want the program to crash + // and force me to figure out how to handle it. + panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID)) } + tree.Items.Store(item.Key, itemPtr{ + Node: leaf, + Idx: j, + }) + numAdded++ + ts.cbAddedItem(ctx, treeID, item.Key) + progress(i) } - return true - }) - progress() + } + progress(len(tree.leafToRoots)) progressWriter.Done() } @@ -243,6 +234,7 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta if slices.Contains(treeID, stack) { return false } + tree := &rebuiltTree{ ID: treeID, Roots: make(containers.Set[btrfsvol.LogicalAddr]), @@ -282,13 +274,81 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta tree.Parent = ts.trees[parentID] } } + tree.indexLeafs(ctx, ts.graph) + ts.trees[treeID] = tree if root != 0 { ts.AddRoot(ctx, treeID, root) } + return true } +type indexStats struct { + TreeID btrfsprim.ObjID + DoneNodes int + TotalNodes int +} + +func (s indexStats) String() string { + return fmt.Sprintf("tree %v: indexing leaf nodes: %v%% (%v/%v)", + s.TreeID, + int(100*float64(s.DoneNodes)/float64(s.TotalNodes)), + s.DoneNodes, s.TotalNodes) +} + +func (tree *rebuiltTree) indexLeafs(ctx context.Context, graph pkggraph.Graph) { + nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + progressWriter := textui.NewProgress[indexStats](ctx, dlog.LogLevelInfo, 1*time.Second) + progress := func() { + progressWriter.Set(indexStats{ + TreeID: tree.ID, + DoneNodes: len(nodeToRoots), + TotalNodes: len(graph.Nodes), + }) + } + progress() + for _, node := range maps.SortedKeys(graph.Nodes) { + tree.indexNode(graph, node, nodeToRoots, progress, nil) + } + progressWriter.Done() + + tree.leafToRoots = make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + for node, roots := range nodeToRoots { + if graph.Nodes[node].Level == 0 && len(roots) > 0 { + tree.leafToRoots[node] = roots + } + } +} + +func (tree *rebuiltTree) indexNode(graph pkggraph.Graph, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) { + defer progress() + if _, done := index[node]; done { + return + } + if slices.Contains(node, stack) { + return + } + if !tree.isOwnerOK(graph.Nodes[node].Owner) { + index[node] = nil + return + } + kps := slices.RemoveAllFunc(graph.EdgesTo[node], func(kp *pkggraph.Edge) bool { + return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner) + }) + if len(kps) == 0 { + index[node] = containers.NewSet[btrfsvol.LogicalAddr](node) + return + } + stack = append(stack, node) + roots := make(containers.Set[btrfsvol.LogicalAddr]) + for _, kp := range kps { + tree.indexNode(graph, kp.FromNode, index, progress, stack) + roots.InsertFrom(index[kp.FromNode]) + } + index[node] = roots +} + // Load reads an item from a tree. // // It is not nescessary to call AddTree for that tree first; Load will @@ -339,6 +399,27 @@ func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, f return ret } +// LeafToRoots returns the list of potential roots (to pass to +// .AddRoot) that include a given leaf-node. +// +// It is not nescessary to call AddTree for the tree first; +// LeafToRoots will call it for you. +func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, leaf btrfsvol.LogicalAddr) containers.Set[btrfsvol.LogicalAddr] { + if !ts.AddTree(ctx, treeID) { + return nil + } + ret := make(containers.Set[btrfsvol.LogicalAddr]) + for root := range ts.trees[treeID].leafToRoots[leaf] { + if !ts.trees[treeID].Roots.Has(root) { + ret.Insert(root) + } + } + if len(ret) == 0 { + return nil + } + return ret +} + // ListRoots returns a listing of all initialized trees and their root // nodes. // -- cgit v1.2.3-2-g168b From 30be1eacbe4ff253c82c6e6163234dc20b4de550 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 21 Dec 2022 05:30:25 -0700 Subject: rebuildnodes: Refactor existing key-io code in to a sub-package --- .../rebuildnodes/btrees/rebuilt_btrees.go | 72 +++++----------------- 1 file changed, 16 insertions(+), 56 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index 6e68a84..613f3ca 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -16,18 +16,13 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/diskio" "git.lukeshu.com/btrfs-progs-ng/lib/maps" "git.lukeshu.com/btrfs-progs-ng/lib/slices" "git.lukeshu.com/btrfs-progs-ng/lib/textui" ) -type itemPtr struct { - Node btrfsvol.LogicalAddr - Idx int -} - type rebuiltTree struct { // static ID btrfsprim.ObjID @@ -39,7 +34,7 @@ type rebuiltTree struct { // mutable Roots containers.Set[btrfsvol.LogicalAddr] - Items containers.SortedMap[btrfsprim.Key, itemPtr] + Items containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] } // isOwnerOK returns whether it is permissible for a node with @@ -85,9 +80,9 @@ func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { // NewRebuiltTrees(). type RebuiltTrees struct { // static - rawFile diskio.File[btrfsvol.LogicalAddr] - sb btrfstree.Superblock - graph pkggraph.Graph + sb btrfstree.Superblock + graph pkggraph.Graph + keyIO keyio.Handle // static callbacks cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) @@ -95,63 +90,28 @@ type RebuiltTrees struct { cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) // mutable - trees map[btrfsprim.ObjID]*rebuiltTree - nodeCache *containers.LRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]] + trees map[btrfsprim.ObjID]*rebuiltTree } // NewRebuiltTrees returns a new RebuiltTrees instance. All of the // callbacks must be non-nil. func NewRebuiltTrees( - file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph pkggraph.Graph, + sb btrfstree.Superblock, graph pkggraph.Graph, keyIO keyio.Handle, cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool), cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), ) *RebuiltTrees { return &RebuiltTrees{ - rawFile: file, - sb: sb, - graph: graph, + sb: sb, + graph: graph, + keyIO: keyIO, cbAddedItem: cbAddedItem, cbLookupRoot: cbLookupRoot, cbLookupUUID: cbLookupUUID, - trees: make(map[btrfsprim.ObjID]*rebuiltTree), - nodeCache: containers.NewLRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]](8), - } -} - -func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] { - if cached, ok := ts.nodeCache.Get(laddr); ok { - return cached - } - - graphInfo, ok := ts.graph.Nodes[laddr] - if !ok { - panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr)) + trees: make(map[btrfsprim.ObjID]*rebuiltTree), } - - ref, err := btrfstree.ReadNode(ts.rawFile, ts.sb, laddr, btrfstree.NodeExpectations{ - LAddr: containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr}, - Level: containers.Optional[uint8]{OK: true, Val: graphInfo.Level}, - Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: graphInfo.Generation}, - Owner: func(treeID btrfsprim.ObjID) error { - if treeID != graphInfo.Owner { - return fmt.Errorf("expected owner=%v but claims to have owner=%v", - graphInfo.Owner, treeID) - } - return nil - }, - MinItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MinItem}, - MaxItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MaxItem}, - }) - if err != nil { - panic(fmt.Errorf("should not happen: i/o error: %w", err)) - } - - ts.nodeCache.Add(laddr, ref) - - return ref } type rootStats struct { @@ -198,14 +158,14 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo if !roots.Has(rootNode) { continue } - for j, item := range ts.readNode(leaf).Data.BodyLeaf { + for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf { if _, exists := tree.Items.Load(item.Key); exists { // This is a panic because I'm not really sure what the best way to // handle this is, and so if this happens I want the program to crash // and force me to figure out how to handle it. panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID)) } - tree.Items.Store(item.Key, itemPtr{ + tree.Items.Store(item.Key, keyio.ItemPtr{ Node: leaf, Idx: j, }) @@ -361,7 +321,7 @@ func (ts *RebuiltTrees) Load(ctx context.Context, treeID btrfsprim.ObjID, key bt if !ok { return nil, false } - return ts.readNode(ptr.Node).Data.BodyLeaf[ptr.Idx].Body, true + return ts.keyIO.ReadItem(ptr) } // Search searches for an item from a tree. @@ -372,7 +332,7 @@ func (ts *RebuiltTrees) Search(ctx context.Context, treeID btrfsprim.ObjID, fn f if !ts.AddTree(ctx, treeID) { return btrfsprim.Key{}, false } - k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ itemPtr) int { + k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ keyio.ItemPtr) int { return fn(k) }) return k, ok @@ -386,7 +346,7 @@ func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, f if !ts.AddTree(ctx, treeID) { return nil } - kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ itemPtr) int { + kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ keyio.ItemPtr) int { return fn(k) }) if len(kvs) == 0 { -- cgit v1.2.3-2-g168b From 7620e1948a4d1e17458d8cfc9ed306bb774a3274 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 21 Dec 2022 04:30:56 -0700 Subject: rebuildnodes: Migrate to the new rebuilt-btrees system --- .../rebuildnodes/btrees/rebuilt_btrees.go | 29 +++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index 613f3ca..23e974e 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -51,6 +51,21 @@ func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { } } +// cowDistance returns how many COW-snapshots down the 'tree' is from +// the 'parent'. +func (tree *rebuiltTree) cowDistance(parentID btrfsprim.ObjID) (dist int, ok bool) { + for { + if parentID == tree.ID { + return dist, true + } + if tree.Parent == nil { + return 0, false + } + tree = tree.Parent + dist++ + } +} + // RebuiltTrees is an abstraction for rebuilding and accessing // potentially broken btrees. // @@ -70,12 +85,15 @@ func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { // // - it does not keep track of errors encountered in a tree // -// Additionally, it provides a piece of functionality that +// Additionally, it provides some functionality that // btrfsutil.BrokenTrees does not: // // - it provides a .LeafToRoots() method to advise on what // additional roots should be added // +// - it provides a .COWDistance() method to compare how related two +// trees are +// // A zero RebuiltTrees is invalid; it must be initialized with // NewRebuiltTrees(). type RebuiltTrees struct { @@ -380,6 +398,15 @@ func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, return ret } +// COWDistance returns how many COW-snapshots down from the 'child' +// tree is from the 'parent' tree. +// +// It is invalid (panic) to call COWDistance for a tree without having +// called AddTree for the child first. +func (ts *RebuiltTrees) COWDistance(ctx context.Context, childID, parentID btrfsprim.ObjID) (dist int, ok bool) { + return ts.trees[childID].cowDistance(parentID) +} + // ListRoots returns a listing of all initialized trees and their root // nodes. // -- cgit v1.2.3-2-g168b From ec31481b46fb772dfe2e977ef37fc0ace39fad8a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 22 Dec 2022 16:30:27 -0700 Subject: rebuildnodes/btrees: Try to handle duplicate keys by generation --- .../rebuildnodes/btrees/rebuilt_btrees.go | 56 ++++++++++++++-------- 1 file changed, 37 insertions(+), 19 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index 23e974e..cbe7df1 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -136,17 +136,18 @@ type rootStats struct { TreeID btrfsprim.ObjID RootNode btrfsvol.LogicalAddr - DoneLeafs int - TotalLeafs int - AddedItems int + DoneLeafs int + TotalLeafs int + AddedItems int + ReplacedItems int } func (s rootStats) String() string { - return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items)", + return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items, replaced %v items)", s.TreeID, s.RootNode, int(100*float64(s.DoneLeafs)/float64(s.TotalLeafs)), s.DoneLeafs, s.TotalLeafs, - s.AddedItems) + s.AddedItems, s.ReplacedItems) } // AddRoot adds an additional root node to an existing tree. It is @@ -161,13 +162,15 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second) numAdded := 0 + numReplaced := 0 progress := func(done int) { progressWriter.Set(rootStats{ - TreeID: treeID, - RootNode: rootNode, - DoneLeafs: done, - TotalLeafs: len(tree.leafToRoots), - AddedItems: numAdded, + TreeID: treeID, + RootNode: rootNode, + DoneLeafs: done, + TotalLeafs: len(tree.leafToRoots), + AddedItems: numAdded, + ReplacedItems: numReplaced, }) } for i, leaf := range maps.SortedKeys(tree.leafToRoots) { @@ -177,17 +180,32 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo continue } for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf { - if _, exists := tree.Items.Load(item.Key); exists { - // This is a panic because I'm not really sure what the best way to - // handle this is, and so if this happens I want the program to crash - // and force me to figure out how to handle it. - panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID)) - } - tree.Items.Store(item.Key, keyio.ItemPtr{ + newPtr := keyio.ItemPtr{ Node: leaf, Idx: j, - }) - numAdded++ + } + if oldPtr, exists := tree.Items.Load(item.Key); !exists { + tree.Items.Store(item.Key, newPtr) + numAdded++ + } else { + oldGen := ts.graph.Nodes[oldPtr.Node].Generation + newGen := ts.graph.Nodes[newPtr.Node].Generation + switch { + case oldGen < newGen: + tree.Items.Store(item.Key, newPtr) + numReplaced++ + case oldGen > newGen: + // keep the old one + case oldGen == newGen: + // This is a panic because I'm not really sure what the best way to + // handle this is, and so if this happens I want the program to crash + // and force me to figure out how to handle it. + panic(fmt.Errorf("dup key=%v in tree=%v: old=%v=%#v ; new=%v=%#v", + item.Key, treeID, + oldPtr, ts.graph.Nodes[oldPtr.Node], + newPtr, ts.graph.Nodes[newPtr.Node])) + } + } ts.cbAddedItem(ctx, treeID, item.Key) progress(i) } -- cgit v1.2.3-2-g168b From 3e22a30a7febd9890163966d01dd2b34656c9e39 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 22 Dec 2022 18:24:03 -0700 Subject: rebuildnodes: Handle the same node being added twice --- .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index cbe7df1..a4ccd5e 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -34,6 +34,7 @@ type rebuiltTree struct { // mutable Roots containers.Set[btrfsvol.LogicalAddr] + Leafs containers.Set[btrfsvol.LogicalAddr] Items containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] } @@ -175,10 +176,10 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo } for i, leaf := range maps.SortedKeys(tree.leafToRoots) { progress(i) - roots := tree.leafToRoots[leaf] - if !roots.Has(rootNode) { + if tree.Leafs.Has(leaf) || !tree.leafToRoots[leaf].Has(rootNode) { continue } + tree.Leafs.Insert(leaf) for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf { newPtr := keyio.ItemPtr{ Node: leaf, @@ -234,6 +235,7 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta tree := &rebuiltTree{ ID: treeID, Roots: make(containers.Set[btrfsvol.LogicalAddr]), + Leafs: make(containers.Set[btrfsvol.LogicalAddr]), } var root btrfsvol.LogicalAddr switch treeID { @@ -404,11 +406,17 @@ func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, if !ts.AddTree(ctx, treeID) { return nil } + if ts.graph.Nodes[leaf].Level != 0 { + panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): not a leaf", + treeID, leaf)) + } ret := make(containers.Set[btrfsvol.LogicalAddr]) for root := range ts.trees[treeID].leafToRoots[leaf] { - if !ts.trees[treeID].Roots.Has(root) { - ret.Insert(root) + if ts.trees[treeID].Roots.Has(root) { + panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): tree contains root=%v but not leaf", + treeID, leaf, root)) } + ret.Insert(root) } if len(ret) == 0 { return nil -- cgit v1.2.3-2-g168b From fc6f8871c39d3a74153e4ce8fd440c256f65b650 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 23 Dec 2022 16:03:27 -0700 Subject: rebuildnodes/btrees: Track the tree's root item offset --- .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index a4ccd5e..3a6c35b 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -25,9 +25,10 @@ import ( type rebuiltTree struct { // static - ID btrfsprim.ObjID - UUID btrfsprim.UUID - Parent *rebuiltTree + ID btrfsprim.ObjID + UUID btrfsprim.UUID + Parent *rebuiltTree + ParentGen btrfsprim.Generation // offset of this tree's root item // all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] @@ -105,7 +106,7 @@ type RebuiltTrees struct { // static callbacks cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) - cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool) + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) // mutable @@ -117,7 +118,7 @@ type RebuiltTrees struct { func NewRebuiltTrees( sb btrfstree.Superblock, graph pkggraph.Graph, keyIO keyio.Handle, cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), - cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool), + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool), cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), ) *RebuiltTrees { return &RebuiltTrees{ @@ -252,13 +253,14 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { return false } - rootItem, ok := ts.cbLookupRoot(ctx, treeID) + rootOff, rootItem, ok := ts.cbLookupRoot(ctx, treeID) if !ok { return false } root = rootItem.ByteNr tree.UUID = rootItem.UUID if rootItem.ParentUUID != (btrfsprim.UUID{}) { + tree.ParentGen = rootOff if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { return false } -- cgit v1.2.3-2-g168b From 51d6ccf1e5af4d88bfe6842b06fa1bc2d5cc732c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 23 Dec 2022 17:10:47 -0700 Subject: rebuildnodes: Have keyio.Handle always be a pointer, fuss with ScanDevices return type --- lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index 3a6c35b..1d5ba14 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -102,7 +102,7 @@ type RebuiltTrees struct { // static sb btrfstree.Superblock graph pkggraph.Graph - keyIO keyio.Handle + keyIO *keyio.Handle // static callbacks cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) @@ -116,7 +116,7 @@ type RebuiltTrees struct { // NewRebuiltTrees returns a new RebuiltTrees instance. All of the // callbacks must be non-nil. func NewRebuiltTrees( - sb btrfstree.Superblock, graph pkggraph.Graph, keyIO keyio.Handle, + sb btrfstree.Superblock, graph pkggraph.Graph, keyIO *keyio.Handle, cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool), cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), -- cgit v1.2.3-2-g168b From 8fc3c78924da1dedd3adce3b923adf58e5ca732a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 23 Dec 2022 17:50:46 -0700 Subject: rebuildnodes: Have the graph store keys; avoid I/O when indexing a tree --- .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index 1d5ba14..f919b37 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -181,20 +181,20 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo continue } tree.Leafs.Insert(leaf) - for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf { + for j, itemKey := range ts.graph.Nodes[leaf].Items { newPtr := keyio.ItemPtr{ Node: leaf, Idx: j, } - if oldPtr, exists := tree.Items.Load(item.Key); !exists { - tree.Items.Store(item.Key, newPtr) + if oldPtr, exists := tree.Items.Load(itemKey); !exists { + tree.Items.Store(itemKey, newPtr) numAdded++ } else { oldGen := ts.graph.Nodes[oldPtr.Node].Generation newGen := ts.graph.Nodes[newPtr.Node].Generation switch { case oldGen < newGen: - tree.Items.Store(item.Key, newPtr) + tree.Items.Store(itemKey, newPtr) numReplaced++ case oldGen > newGen: // keep the old one @@ -203,12 +203,12 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo // handle this is, and so if this happens I want the program to crash // and force me to figure out how to handle it. panic(fmt.Errorf("dup key=%v in tree=%v: old=%v=%#v ; new=%v=%#v", - item.Key, treeID, + itemKey, treeID, oldPtr, ts.graph.Nodes[oldPtr.Node], newPtr, ts.graph.Nodes[newPtr.Node])) } } - ts.cbAddedItem(ctx, treeID, item.Key) + ts.cbAddedItem(ctx, treeID, itemKey) progress(i) } } -- cgit v1.2.3-2-g168b From 6330b26a9f9c7769c48e2bc90e065457f8e138ab Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 23 Dec 2022 16:48:42 -0700 Subject: rebuildnodes: Have the key index belong to the btree, and be smarter --- .../rebuildnodes/btrees/rebuilt_btrees.go | 106 ++++++++++++++------- 1 file changed, 74 insertions(+), 32 deletions(-) (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go') diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go index f919b37..50c75a4 100644 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go @@ -32,6 +32,7 @@ type rebuiltTree struct { // all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] + keys containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] // mutable Roots containers.Set[btrfsvol.LogicalAddr] @@ -41,14 +42,14 @@ type rebuiltTree struct { // isOwnerOK returns whether it is permissible for a node with // .Head.Owner=owner to be in this tree. -func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool { +func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool { for { - if tree == nil { - return false - } if owner == tree.ID { return true } + if tree.Parent == nil || gen >= tree.ParentGen { + return false + } tree = tree.Parent } } @@ -68,6 +69,38 @@ func (tree *rebuiltTree) cowDistance(parentID btrfsprim.ObjID) (dist int, ok boo } } +func (tree *rebuiltTree) shouldReplace(graph pkggraph.Graph, oldNode, newNode btrfsvol.LogicalAddr) bool { + oldDist, _ := tree.cowDistance(graph.Nodes[oldNode].Owner) + newDist, _ := tree.cowDistance(graph.Nodes[newNode].Owner) + switch { + case newDist < oldDist: + // Replace the old one with the new lower-dist one. + return true + case newDist > oldDist: + // Retain the old lower-dist one. + return false + default: + oldGen := graph.Nodes[oldNode].Generation + newGen := graph.Nodes[newNode].Generation + switch { + case newGen > oldGen: + // Replace the old one with the new higher-gen one. + return true + case newGen < oldGen: + // Retain the old higher-gen one. + return false + default: + // This is a panic because I'm not really sure what the best way to + // handle this is, and so if this happens I want the program to crash + // and force me to figure out how to handle it. + panic(fmt.Errorf("dup nodes in tree=%v: old=%v=%v ; new=%v=%v", + tree.ID, + oldNode, graph.Nodes[oldNode], + newNode, graph.Nodes[newNode])) + } + } +} + // RebuiltTrees is an abstraction for rebuilding and accessing // potentially broken btrees. // @@ -189,24 +222,9 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo if oldPtr, exists := tree.Items.Load(itemKey); !exists { tree.Items.Store(itemKey, newPtr) numAdded++ - } else { - oldGen := ts.graph.Nodes[oldPtr.Node].Generation - newGen := ts.graph.Nodes[newPtr.Node].Generation - switch { - case oldGen < newGen: - tree.Items.Store(itemKey, newPtr) - numReplaced++ - case oldGen > newGen: - // keep the old one - case oldGen == newGen: - // This is a panic because I'm not really sure what the best way to - // handle this is, and so if this happens I want the program to crash - // and force me to figure out how to handle it. - panic(fmt.Errorf("dup key=%v in tree=%v: old=%v=%#v ; new=%v=%#v", - itemKey, treeID, - oldPtr, ts.graph.Nodes[oldPtr.Node], - newPtr, ts.graph.Nodes[newPtr.Node])) - } + } else if tree.shouldReplace(ts.graph, oldPtr.Node, newPtr.Node) { + tree.Items.Store(itemKey, newPtr) + numReplaced++ } ts.cbAddedItem(ctx, treeID, itemKey) progress(i) @@ -327,26 +345,42 @@ func (tree *rebuiltTree) indexNode(graph pkggraph.Graph, node btrfsvol.LogicalAd return } if slices.Contains(node, stack) { - return + panic("loop") } - if !tree.isOwnerOK(graph.Nodes[node].Owner) { + if !tree.isOwnerOK(graph.Nodes[node].Owner, graph.Nodes[node].Generation) { index[node] = nil return } + + // tree.leafToRoots + stack = append(stack, node) + var roots containers.Set[btrfsvol.LogicalAddr] kps := slices.RemoveAllFunc(graph.EdgesTo[node], func(kp *pkggraph.Edge) bool { - return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner) + return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner, graph.Nodes[kp.FromNode].Generation) }) - if len(kps) == 0 { - index[node] = containers.NewSet[btrfsvol.LogicalAddr](node) - return - } - stack = append(stack, node) - roots := make(containers.Set[btrfsvol.LogicalAddr]) for _, kp := range kps { tree.indexNode(graph, kp.FromNode, index, progress, stack) - roots.InsertFrom(index[kp.FromNode]) + if len(index[kp.FromNode]) > 0 { + if roots == nil { + roots = make(containers.Set[btrfsvol.LogicalAddr]) + } + roots.InsertFrom(index[kp.FromNode]) + } + } + if roots == nil { + roots = containers.NewSet[btrfsvol.LogicalAddr](node) } index[node] = roots + + // tree.keys + for i, key := range graph.Nodes[node].Items { + if oldPtr, ok := tree.keys.Load(key); !ok || tree.shouldReplace(graph, oldPtr.Node, node) { + tree.keys.Store(key, keyio.ItemPtr{ + Node: node, + Idx: i, + }) + } + } } // Load reads an item from a tree. @@ -426,6 +460,14 @@ func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, return ret } +// Keys returns a map of all keys in node that would be valid in this tree. +// +// It is invalid (panic) to call Keys for a tree without having called +// AddTree first. +func (ts *RebuiltTrees) Keys(treeID btrfsprim.ObjID) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] { + return &ts.trees[treeID].keys +} + // COWDistance returns how many COW-snapshots down from the 'child' // tree is from the 'parent' tree. // -- cgit v1.2.3-2-g168b