diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-05 19:48:27 -0700 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2023-01-05 19:48:27 -0700 |
commit | cd01485ec126c103fa9ab718685d184b70d0e1ff (patch) | |
tree | 0c003455bf0795a04e4fb198d2e85e2c7b63ffb3 /lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees | |
parent | f416f777b2c2cac095e851e6799020f91e34aed1 (diff) | |
parent | 3b385f26973e45b4c2e2f3ebf9d52ab0131cff5e (diff) |
Merge branch 'lukeshu/rebuild-nodes-take4'
Diffstat (limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')
3 files changed, 554 insertions, 471 deletions
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/forrest.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/forrest.go new file mode 100644 index 0000000..ff6b1c5 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/forrest.go @@ -0,0 +1,193 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrees + +import ( + "context" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +// RebuiltForrest is an abstraction for rebuilding and accessing +// potentially broken btrees. +// +// It is conceptually a btrfstree.TreeOperator, and adds similar +// broken-tree handling to btrfsutil.BrokenForrest. However, the API +// is different thant btrfstree.TreeOperator, and is much more +// efficient than btrfsutil.BrokenForrest. +// +// The efficiency improvements are possible because of the API +// differences, which are necessary for how it is used in +// rebuildnodes: +// +// - it consumes an already-read graph.Graph instead of reading the +// graph itself +// +// - it does not use `btrfstree.TreePath` +// +// - it does not keep track of errors encountered in a tree +// +// Additionally, it provides some functionality that +// btrfsutil.BrokenForrest does not: +// +// - it provides a .LeafToRoots() method to advise on what +// additional roots should be added +// +// - it provides a .COWDistance() method to compare how related two +// trees are +// +// A zero RebuiltForrest is invalid; it must be initialized with +// NewRebuiltForrest(). +type RebuiltForrest struct { + // static + sb btrfstree.Superblock + graph pkggraph.Graph + keyIO *keyio.Handle + + // static callbacks + cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) + cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) + + // mutable + trees containers.SyncMap[btrfsprim.ObjID, *RebuiltTree] + leafs *containers.LRUCache[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]] + incItems *containers.LRUCache[btrfsprim.ObjID, *itemIndex] + excItems *containers.LRUCache[btrfsprim.ObjID, *itemIndex] +} + +// NewRebuiltForrest returns a new RebuiltForrest instance. All of +// the callbacks must be non-nil. +func NewRebuiltForrest( + sb btrfstree.Superblock, graph pkggraph.Graph, keyIO *keyio.Handle, + cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), + cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool), + cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), +) *RebuiltForrest { + return &RebuiltForrest{ + sb: sb, + graph: graph, + keyIO: keyIO, + + cbAddedItem: cbAddedItem, + cbLookupRoot: cbLookupRoot, + cbLookupUUID: cbLookupUUID, + + leafs: containers.NewLRUCache[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]](textui.Tunable(8)), + incItems: containers.NewLRUCache[btrfsprim.ObjID, *itemIndex](textui.Tunable(8)), + excItems: containers.NewLRUCache[btrfsprim.ObjID, *itemIndex](textui.Tunable(8)), + } +} + +// Tree returns a given tree, initializing it if nescessary. If it is +// unable to initialize the tree, then nil is returned, and nothing is +// done to the forrest. +// +// The tree is initialized with the normal root node of the tree. +func (ts *RebuiltForrest) Tree(ctx context.Context, treeID btrfsprim.ObjID) *RebuiltTree { + if !ts.addTree(ctx, treeID, nil) { + return nil + } + tree, _ := ts.trees.Load(treeID) + return tree +} + +func (ts *RebuiltForrest) addTree(ctx context.Context, treeID btrfsprim.ObjID, stack []btrfsprim.ObjID) (ok bool) { + if tree, ok := ts.trees.Load(treeID); ok { + return tree != nil + } + defer func() { + if !ok { + // Store a negative cache of this. tree.AddRoot() for the ROOT or UUID + // trees will invalidate the negative cache. + ts.trees.Store(treeID, nil) + } + }() + stack = append(stack, treeID) + ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.add-tree", stack) + dlog.Info(ctx, "adding tree...") + if slices.Contains(treeID, stack[:len(stack)-1]) { + dlog.Errorf(ctx, "failed to add tree: loop detected: %v", stack) + return false + } + + tree := &RebuiltTree{ + ID: treeID, + Roots: make(containers.Set[btrfsvol.LogicalAddr]), + forrest: ts, + } + var root btrfsvol.LogicalAddr + switch treeID { + case btrfsprim.ROOT_TREE_OBJECTID: + root = ts.sb.RootTree + case btrfsprim.CHUNK_TREE_OBJECTID: + root = ts.sb.ChunkTree + case btrfsprim.TREE_LOG_OBJECTID: + root = ts.sb.LogTree + case btrfsprim.BLOCK_GROUP_TREE_OBJECTID: + root = ts.sb.BlockGroupRoot + default: + if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { + dlog.Error(ctx, "failed to add tree: add ROOT_TREE") + return false + } + rootOff, rootItem, ok := ts.cbLookupRoot(ctx, treeID) + if !ok { + dlog.Error(ctx, "failed to add tree: lookup ROOT_ITEM") + return false + } + root = rootItem.ByteNr + tree.UUID = rootItem.UUID + if rootItem.ParentUUID != (btrfsprim.UUID{}) { + tree.ParentGen = rootOff + if !ts.addTree(ctx, btrfsprim.UUID_TREE_OBJECTID, stack) { + return false + } + parentID, ok := ts.cbLookupUUID(ctx, rootItem.ParentUUID) + if !ok { + dlog.Error(ctx, "failed to add tree: lookup UUID") + return false + } + if !ts.addTree(ctx, parentID, stack) { + dlog.Error(ctx, "failed to add tree: add parent tree") + return false + } + tree.Parent, _ = ts.trees.Load(parentID) + } + } + + ts.trees.Store(treeID, tree) + if root != 0 { + tree.AddRoot(ctx, root) + } + + return true +} + +// ListRoots returns a listing of all initialized trees and their root +// nodes. +// +// Do not mutate the set of roots for a tree; it is a pointer to the +// RebuiltForrest's internal set! +func (ts *RebuiltForrest) ListRoots() map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr] { + ret := make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr]) + ts.trees.Range(func(treeID btrfsprim.ObjID, tree *RebuiltTree) bool { + if tree != nil { + ret[treeID] = tree.Roots + } + return true + }) + return ret +} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go deleted file mode 100644 index b53a28e..0000000 --- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go +++ /dev/null @@ -1,471 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package btrees - -import ( - "context" - "fmt" - "time" - - "github.com/datawire/dlib/dlog" - - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" - pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" - "git.lukeshu.com/btrfs-progs-ng/lib/containers" - "git.lukeshu.com/btrfs-progs-ng/lib/maps" - "git.lukeshu.com/btrfs-progs-ng/lib/slices" - "git.lukeshu.com/btrfs-progs-ng/lib/textui" -) - -type rebuiltTree struct { - // static - ID btrfsprim.ObjID - UUID btrfsprim.UUID - Parent *rebuiltTree - ParentGen btrfsprim.Generation // offset of this tree's root item - - // all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree - leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] - keys containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] - - // mutable - Roots containers.Set[btrfsvol.LogicalAddr] - Leafs containers.Set[btrfsvol.LogicalAddr] - Items containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] -} - -// isOwnerOK returns whether it is permissible for a node with -// .Head.Owner=owner to be in this tree. -func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool { - for { - if owner == tree.ID { - return true - } - if tree.Parent == nil || gen >= tree.ParentGen { - return false - } - tree = tree.Parent - } -} - -// cowDistance returns how many COW-snapshots down the 'tree' is from -// the 'parent'. -func (tree *rebuiltTree) cowDistance(parentID btrfsprim.ObjID) (dist int, ok bool) { - for { - if parentID == tree.ID { - return dist, true - } - if tree.Parent == nil { - return 0, false - } - tree = tree.Parent - dist++ - } -} - -func (tree *rebuiltTree) shouldReplace(graph pkggraph.Graph, oldNode, newNode btrfsvol.LogicalAddr) bool { - oldDist, _ := tree.cowDistance(graph.Nodes[oldNode].Owner) - newDist, _ := tree.cowDistance(graph.Nodes[newNode].Owner) - switch { - case newDist < oldDist: - // Replace the old one with the new lower-dist one. - return true - case newDist > oldDist: - // Retain the old lower-dist one. - return false - default: - oldGen := graph.Nodes[oldNode].Generation - newGen := graph.Nodes[newNode].Generation - switch { - case newGen > oldGen: - // Replace the old one with the new higher-gen one. - return true - case newGen < oldGen: - // Retain the old higher-gen one. - return false - default: - // This is a panic because I'm not really sure what the best way to - // handle this is, and so if this happens I want the program to crash - // and force me to figure out how to handle it. - panic(fmt.Errorf("dup nodes in tree=%v: old=%v=%v ; new=%v=%v", - tree.ID, - oldNode, graph.Nodes[oldNode], - newNode, graph.Nodes[newNode])) - } - } -} - -// RebuiltTrees is an abstraction for rebuilding and accessing -// potentially broken btrees. -// -// It is conceptually a btrfstree.TreeOperator, and adds similar -// broken-tree handling to btrfsutil.BrokenTrees. However, the API is -// different thant btrfstree.TreeOperator, and is much more efficient -// than btrfsutil.BrokenTrees. -// -// The efficiency improvements are possible because of the API -// differences, which are necessary for how it is used in -// rebuildnodes: -// -// - it consumes an already-read graph.Graph instead of reading the -// graph itself -// -// - it does not use `btrfstree.TreePath` -// -// - it does not keep track of errors encountered in a tree -// -// Additionally, it provides some functionality that -// btrfsutil.BrokenTrees does not: -// -// - it provides a .LeafToRoots() method to advise on what -// additional roots should be added -// -// - it provides a .COWDistance() method to compare how related two -// trees are -// -// A zero RebuiltTrees is invalid; it must be initialized with -// NewRebuiltTrees(). -type RebuiltTrees struct { - // static - sb btrfstree.Superblock - graph pkggraph.Graph - keyIO *keyio.Handle - - // static callbacks - cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) - cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) - cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) - - // mutable - trees map[btrfsprim.ObjID]*rebuiltTree -} - -// NewRebuiltTrees returns a new RebuiltTrees instance. All of the -// callbacks must be non-nil. -func NewRebuiltTrees( - sb btrfstree.Superblock, graph pkggraph.Graph, keyIO *keyio.Handle, - cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key), - cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool), - cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool), -) *RebuiltTrees { - return &RebuiltTrees{ - sb: sb, - graph: graph, - keyIO: keyIO, - - cbAddedItem: cbAddedItem, - cbLookupRoot: cbLookupRoot, - cbLookupUUID: cbLookupUUID, - - trees: make(map[btrfsprim.ObjID]*rebuiltTree), - } -} - -type rootStats struct { - Leafs textui.Portion[int] - AddedItems int - ReplacedItems int -} - -func (s rootStats) String() string { - return textui.Sprintf("%v (added %v items, replaced %v items)", - s.Leafs, s.AddedItems, s.ReplacedItems) -} - -// AddRoot adds an additional root node to an existing tree. It is -// useful to call .AddRoot() to re-attach part of the tree that has -// been broken off. -// -// It is invalid (panic) to call AddRoot for a tree without having -// called AddTree first. -func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, rootNode btrfsvol.LogicalAddr) { - ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.add-root", fmt.Sprintf("tree=%v rootNode=%v", treeID, rootNode)) - tree := ts.trees[treeID] - tree.Roots.Insert(rootNode) - - var stats rootStats - stats.Leafs.D = len(tree.leafToRoots) - progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) - for i, leaf := range maps.SortedKeys(tree.leafToRoots) { - stats.Leafs.N = i - progressWriter.Set(stats) - if tree.Leafs.Has(leaf) || !tree.leafToRoots[leaf].Has(rootNode) { - continue - } - tree.Leafs.Insert(leaf) - for j, itemKey := range ts.graph.Nodes[leaf].Items { - newPtr := keyio.ItemPtr{ - Node: leaf, - Idx: j, - } - if oldPtr, exists := tree.Items.Load(itemKey); !exists { - tree.Items.Store(itemKey, newPtr) - stats.AddedItems++ - } else if tree.shouldReplace(ts.graph, oldPtr.Node, newPtr.Node) { - tree.Items.Store(itemKey, newPtr) - stats.ReplacedItems++ - } - ts.cbAddedItem(ctx, treeID, itemKey) - progressWriter.Set(stats) - } - } - stats.Leafs.N = len(tree.leafToRoots) - progressWriter.Set(stats) - progressWriter.Done() -} - -// AddTree initializes the given tree, returning true if it was able -// to do so, or false if there was a problem and nothing was done. -// The tree is initialized with the normal root node of the tree. -// -// Subsequent calls to AddTree for the same tree are no-ops. -func (ts *RebuiltTrees) AddTree(ctx context.Context, treeID btrfsprim.ObjID) (ok bool) { - return ts.addTree(ctx, treeID, nil) -} - -func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, stack []btrfsprim.ObjID) (ok bool) { - if _, ok := ts.trees[treeID]; ok { - return true - } - if slices.Contains(treeID, stack) { - return false - } - stack = append(stack, treeID) - ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.add-tree", stack) - dlog.Info(ctx, "adding tree...") - - tree := &rebuiltTree{ - ID: treeID, - Roots: make(containers.Set[btrfsvol.LogicalAddr]), - Leafs: make(containers.Set[btrfsvol.LogicalAddr]), - } - var root btrfsvol.LogicalAddr - switch treeID { - case btrfsprim.ROOT_TREE_OBJECTID: - root = ts.sb.RootTree - case btrfsprim.CHUNK_TREE_OBJECTID: - root = ts.sb.ChunkTree - case btrfsprim.TREE_LOG_OBJECTID: - root = ts.sb.LogTree - case btrfsprim.BLOCK_GROUP_TREE_OBJECTID: - root = ts.sb.BlockGroupRoot - default: - if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { - return false - } - rootOff, rootItem, ok := ts.cbLookupRoot(ctx, treeID) - if !ok { - return false - } - root = rootItem.ByteNr - tree.UUID = rootItem.UUID - if rootItem.ParentUUID != (btrfsprim.UUID{}) { - tree.ParentGen = rootOff - if !ts.addTree(ctx, btrfsprim.UUID_TREE_OBJECTID, stack) { - return false - } - parentID, ok := ts.cbLookupUUID(ctx, rootItem.ParentUUID) - if !ok { - return false - } - if !ts.addTree(ctx, parentID, append(stack, treeID)) { - return false - } - tree.Parent = ts.trees[parentID] - } - } - tree.indexLeafs(ctx, ts.graph) - - ts.trees[treeID] = tree - if root != 0 { - ts.AddRoot(ctx, treeID, root) - } - - return true -} - -func (tree *rebuiltTree) indexLeafs(ctx context.Context, graph pkggraph.Graph) { - ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.add-tree.substep", "index-nodes") - - nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) - - var stats textui.Portion[int] - stats.D = len(graph.Nodes) - progressWriter := textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) - progress := func() { - stats.N = len(nodeToRoots) - progressWriter.Set(stats) - } - - progress() - for _, node := range maps.SortedKeys(graph.Nodes) { - tree.indexNode(graph, node, nodeToRoots, progress, nil) - } - progressWriter.Done() - - tree.leafToRoots = make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) - for node, roots := range nodeToRoots { - if graph.Nodes[node].Level == 0 && len(roots) > 0 { - tree.leafToRoots[node] = roots - } - } -} - -func (tree *rebuiltTree) indexNode(graph pkggraph.Graph, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) { - defer progress() - if _, done := index[node]; done { - return - } - if slices.Contains(node, stack) { - // This is a panic because graph.FinalCheck() should - // have already checked for loops. - panic("loop") - } - if !tree.isOwnerOK(graph.Nodes[node].Owner, graph.Nodes[node].Generation) { - index[node] = nil - return - } - - // tree.leafToRoots - stack = append(stack, node) - var roots containers.Set[btrfsvol.LogicalAddr] - kps := slices.RemoveAllFunc(graph.EdgesTo[node], func(kp *pkggraph.Edge) bool { - return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner, graph.Nodes[kp.FromNode].Generation) - }) - for _, kp := range kps { - tree.indexNode(graph, kp.FromNode, index, progress, stack) - if len(index[kp.FromNode]) > 0 { - if roots == nil { - roots = make(containers.Set[btrfsvol.LogicalAddr]) - } - roots.InsertFrom(index[kp.FromNode]) - } - } - if roots == nil { - roots = containers.NewSet[btrfsvol.LogicalAddr](node) - } - index[node] = roots - - // tree.keys - for i, key := range graph.Nodes[node].Items { - if oldPtr, ok := tree.keys.Load(key); !ok || tree.shouldReplace(graph, oldPtr.Node, node) { - tree.keys.Store(key, keyio.ItemPtr{ - Node: node, - Idx: i, - }) - } - } -} - -// Load reads an item from a tree. -// -// It is not nescessary to call AddTree for that tree first; Load will -// call it for you. -func (ts *RebuiltTrees) Load(ctx context.Context, treeID btrfsprim.ObjID, key btrfsprim.Key) (item btrfsitem.Item, ok bool) { - if !ts.AddTree(ctx, treeID) { - return nil, false - } - ptr, ok := ts.trees[treeID].Items.Load(key) - if !ok { - return nil, false - } - return ts.keyIO.ReadItem(ctx, ptr) -} - -// Search searches for an item from a tree. -// -// It is not nescessary to call AddTree for that tree first; Search -// will call it for you. -func (ts *RebuiltTrees) Search(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) (key btrfsprim.Key, ok bool) { - if !ts.AddTree(ctx, treeID) { - return btrfsprim.Key{}, false - } - k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ keyio.ItemPtr) int { - return fn(k) - }) - return k, ok -} - -// Search searches for a range of items from a tree. -// -// It is not nescessary to call AddTree for that tree first; SearchAll -// will call it for you. -func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) []btrfsprim.Key { - if !ts.AddTree(ctx, treeID) { - return nil - } - kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ keyio.ItemPtr) int { - return fn(k) - }) - if len(kvs) == 0 { - return nil - } - ret := make([]btrfsprim.Key, len(kvs)) - for i := range kvs { - ret[i] = kvs[i].K - } - return ret -} - -// LeafToRoots returns the list of potential roots (to pass to -// .AddRoot) that include a given leaf-node. -// -// It is not nescessary to call AddTree for the tree first; -// LeafToRoots will call it for you. -func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, leaf btrfsvol.LogicalAddr) containers.Set[btrfsvol.LogicalAddr] { - if !ts.AddTree(ctx, treeID) { - return nil - } - if ts.graph.Nodes[leaf].Level != 0 { - panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): not a leaf", - treeID, leaf)) - } - ret := make(containers.Set[btrfsvol.LogicalAddr]) - for root := range ts.trees[treeID].leafToRoots[leaf] { - if ts.trees[treeID].Roots.Has(root) { - panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): tree contains root=%v but not leaf", - treeID, leaf, root)) - } - ret.Insert(root) - } - if len(ret) == 0 { - return nil - } - return ret -} - -// Keys returns a map of all keys in node that would be valid in this tree. -// -// It is invalid (panic) to call Keys for a tree without having called -// AddTree first. -func (ts *RebuiltTrees) Keys(treeID btrfsprim.ObjID) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] { - return &ts.trees[treeID].keys -} - -// COWDistance returns how many COW-snapshots down from the 'child' -// tree is from the 'parent' tree. -// -// It is invalid (panic) to call COWDistance for a tree without having -// called AddTree for the child first. -func (ts *RebuiltTrees) COWDistance(ctx context.Context, childID, parentID btrfsprim.ObjID) (dist int, ok bool) { - return ts.trees[childID].cowDistance(parentID) -} - -// ListRoots returns a listing of all initialized trees and their root -// nodes. -// -// Do not mutate the set of roots for a tree; it is a pointer to the -// RebuiltTrees' internal set! -func (ts *RebuiltTrees) ListRoots() map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr] { - ret := make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], len(ts.trees)) - for treeID := range ts.trees { - ret[treeID] = ts.trees[treeID].Roots - } - return ret -} diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/tree.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/tree.go new file mode 100644 index 0000000..c381274 --- /dev/null +++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/tree.go @@ -0,0 +1,361 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrees + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +type RebuiltTree struct { + // static + ID btrfsprim.ObjID + UUID btrfsprim.UUID + Parent *RebuiltTree + ParentGen btrfsprim.Generation // offset of this tree's root item + forrest *RebuiltForrest + + // mutable + mu sync.RWMutex + Roots containers.Set[btrfsvol.LogicalAddr] + // There are 3 more mutable "members" that are protected by + // `mu`; but they live in a shared LRUcache. They are all + // derived from tree.Roots, which is why it's OK if they get + // evicted. + // + // 1. tree.leafToRoots() = tree.forrest.leafs.Load(tree.ID) + // 2. tree.Items() = tree.forrest.incItems.Load(tree.ID) + // 3. tree.PotentialItems() = tree.forrest.excItems.Load(tree.ID) +} + +// LRU member 1: .leafToRoots() //////////////////////////////////////////////////////////////////////////////////////// + +// leafToRoots returns all leafs (lvl=0) in the filesystem that pass +// .isOwnerOK, whether or not they're in the tree. +func (tree *RebuiltTree) leafToRoots(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { + return tree.forrest.leafs.GetOrElse(tree.ID, func() map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { + ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.index-nodes", fmt.Sprintf("tree=%v", tree.ID)) + + nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + + var stats textui.Portion[int] + stats.D = len(tree.forrest.graph.Nodes) + progressWriter := textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) + progress := func() { + stats.N = len(nodeToRoots) + progressWriter.Set(stats) + } + + progress() + for _, node := range maps.SortedKeys(tree.forrest.graph.Nodes) { + tree.indexNode(ctx, node, nodeToRoots, progress, nil) + } + progressWriter.Done() + + ret := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + for node, roots := range nodeToRoots { + if tree.forrest.graph.Nodes[node].Level == 0 && len(roots) > 0 { + ret[node] = roots + } + } + return ret + }) +} + +func (tree *RebuiltTree) indexNode(ctx context.Context, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) { + defer progress() + if err := ctx.Err(); err != nil { + return + } + if _, done := index[node]; done { + return + } + if slices.Contains(node, stack) { + // This is a panic because tree.forrest.graph.FinalCheck() should + // have already checked for loops. + panic("loop") + } + if !tree.isOwnerOK(tree.forrest.graph.Nodes[node].Owner, tree.forrest.graph.Nodes[node].Generation) { + index[node] = nil + return + } + + // tree.leafToRoots + stack = append(stack, node) + var roots containers.Set[btrfsvol.LogicalAddr] + kps := slices.RemoveAllFunc(tree.forrest.graph.EdgesTo[node], func(kp *pkggraph.Edge) bool { + return !tree.isOwnerOK(tree.forrest.graph.Nodes[kp.FromNode].Owner, tree.forrest.graph.Nodes[kp.FromNode].Generation) + }) + for _, kp := range kps { + tree.indexNode(ctx, kp.FromNode, index, progress, stack) + if len(index[kp.FromNode]) > 0 { + if roots == nil { + roots = make(containers.Set[btrfsvol.LogicalAddr]) + } + roots.InsertFrom(index[kp.FromNode]) + } + } + if roots == nil { + roots = containers.NewSet[btrfsvol.LogicalAddr](node) + } + index[node] = roots +} + +// isOwnerOK returns whether it is permissible for a node with +// .Head.Owner=owner to be in this tree. +func (tree *RebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool { + for { + if owner == tree.ID { + return true + } + if tree.Parent == nil || gen >= tree.ParentGen { + return false + } + tree = tree.Parent + } +} + +// LRU members 2 and 3: .Items() and .PotentialItems() ///////////////////////////////////////////////////////////////// + +// Items returns a map of the items contained in this tree. +// +// Do not mutate the returned map; it is a pointer to the +// RebuiltTree's internal map! +func (tree *RebuiltTree) Items(ctx context.Context) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] { + ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.index-inc-items", fmt.Sprintf("tree=%v", tree.ID)) + return tree.items(ctx, tree.forrest.incItems, tree.Roots.HasAny) +} + +// PotentialItems returns a map of items that could be added to this +// tree with .AddRoot(). +// +// Do not mutate the returned map; it is a pointer to the +// RebuiltTree's internal map! +func (tree *RebuiltTree) PotentialItems(ctx context.Context) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] { + ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.index-all-items", fmt.Sprintf("tree=%v", tree.ID)) + return tree.items(ctx, tree.forrest.excItems, + func(roots containers.Set[btrfsvol.LogicalAddr]) bool { + return !tree.Roots.HasAny(roots) + }) +} + +type itemIndex = containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] + +type itemStats struct { + Leafs textui.Portion[int] + NumItems int + NumDups int +} + +func (s itemStats) String() string { + return textui.Sprintf("%v (%v items, %v dups)", + s.Leafs, s.NumItems, s.NumDups) +} + +func (tree *RebuiltTree) items(ctx context.Context, cache *containers.LRUCache[btrfsprim.ObjID, *itemIndex], + leafFn func(roots containers.Set[btrfsvol.LogicalAddr]) bool, +) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] { + tree.mu.RLock() + defer tree.mu.RUnlock() + + return cache.GetOrElse(tree.ID, func() *itemIndex { + var leafs []btrfsvol.LogicalAddr + for leaf, roots := range tree.leafToRoots(ctx) { + if leafFn(roots) { + leafs = append(leafs, leaf) + } + } + slices.Sort(leafs) + + var stats itemStats + stats.Leafs.D = len(leafs) + progressWriter := textui.NewProgress[itemStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) + + index := new(containers.SortedMap[btrfsprim.Key, keyio.ItemPtr]) + for i, leaf := range leafs { + stats.Leafs.N = i + progressWriter.Set(stats) + for j, itemKey := range tree.forrest.graph.Nodes[leaf].Items { + newPtr := keyio.ItemPtr{ + Node: leaf, + Idx: j, + } + if oldPtr, exists := index.Load(itemKey); !exists { + index.Store(itemKey, newPtr) + stats.NumItems++ + } else { + if tree.shouldReplace(oldPtr.Node, newPtr.Node) { + index.Store(itemKey, newPtr) + } + stats.NumDups++ + } + progressWriter.Set(stats) + } + } + if stats.Leafs.N > 0 { + stats.Leafs.N = len(leafs) + progressWriter.Set(stats) + progressWriter.Done() + } + + return index + }) +} + +func (tree *RebuiltTree) shouldReplace(oldNode, newNode btrfsvol.LogicalAddr) bool { + oldDist, _ := tree.COWDistance(tree.forrest.graph.Nodes[oldNode].Owner) + newDist, _ := tree.COWDistance(tree.forrest.graph.Nodes[newNode].Owner) + switch { + case newDist < oldDist: + // Replace the old one with the new lower-dist one. + return true + case newDist > oldDist: + // Retain the old lower-dist one. + return false + default: + oldGen := tree.forrest.graph.Nodes[oldNode].Generation + newGen := tree.forrest.graph.Nodes[newNode].Generation + switch { + case newGen > oldGen: + // Replace the old one with the new higher-gen one. + return true + case newGen < oldGen: + // Retain the old higher-gen one. + return false + default: + // TODO: This is a panic because I'm not really sure what the + // best way to handle this is, and so if this happens I want the + // program to crash and force me to figure out how to handle it. + panic(fmt.Errorf("dup nodes in tree=%v: old=%v=%v ; new=%v=%v", + tree.ID, + oldNode, tree.forrest.graph.Nodes[oldNode], + newNode, tree.forrest.graph.Nodes[newNode])) + } + } +} + +// .AddRoot() ////////////////////////////////////////////////////////////////////////////////////////////////////////// + +type rootStats struct { + Leafs textui.Portion[int] + AddedLeafs int + AddedItems int +} + +func (s rootStats) String() string { + return textui.Sprintf("%v (added %v leafs, added %v items)", + s.Leafs, s.AddedLeafs, s.AddedItems) +} + +// AddRoot adds an additional root node to the tree. It is useful to +// call .AddRoot() to re-attach part of the tree that has been broken +// off. +func (tree *RebuiltTree) AddRoot(ctx context.Context, rootNode btrfsvol.LogicalAddr) { + tree.mu.Lock() + defer tree.mu.Unlock() + ctx = dlog.WithField(ctx, "btrfsinspect.rebuild-nodes.rebuild.add-root", fmt.Sprintf("tree=%v rootNode=%v", tree.ID, rootNode)) + + leafToRoots := tree.leafToRoots(ctx) + + var stats rootStats + stats.Leafs.D = len(leafToRoots) + progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) + for i, leaf := range maps.SortedKeys(leafToRoots) { + stats.Leafs.N = i + progressWriter.Set(stats) + + if tree.Roots.HasAny(leafToRoots[leaf]) || !leafToRoots[leaf].Has(rootNode) { + continue + } + + stats.AddedLeafs++ + progressWriter.Set(stats) + + for _, itemKey := range tree.forrest.graph.Nodes[leaf].Items { + tree.forrest.cbAddedItem(ctx, tree.ID, itemKey) + stats.AddedItems++ + progressWriter.Set(stats) + } + } + stats.Leafs.N = len(leafToRoots) + progressWriter.Set(stats) + progressWriter.Done() + + tree.Roots.Insert(rootNode) + tree.forrest.incItems.Remove(tree.ID) // force re-gen + tree.forrest.excItems.Remove(tree.ID) // force re-gen + + if (tree.ID == btrfsprim.ROOT_TREE_OBJECTID || tree.ID == btrfsprim.UUID_TREE_OBJECTID) && stats.AddedItems > 0 { + tree.forrest.trees.Range(func(otherTreeID btrfsprim.ObjID, otherTree *RebuiltTree) bool { + if otherTree == nil { + tree.forrest.trees.Delete(otherTreeID) + } + return true + }) + } +} + +// main public API ///////////////////////////////////////////////////////////////////////////////////////////////////// + +// COWDistance returns how many COW-snapshots down the 'tree' is from +// the 'parent'. +func (tree *RebuiltTree) COWDistance(parentID btrfsprim.ObjID) (dist int, ok bool) { + for { + if parentID == tree.ID { + return dist, true + } + if tree.Parent == nil { + return 0, false + } + tree = tree.Parent + dist++ + } +} + +// ReadItem reads an item from a tree. +func (tree *RebuiltTree) ReadItem(ctx context.Context, key btrfsprim.Key) (item btrfsitem.Item, ok bool) { + ptr, ok := tree.Items(ctx).Load(key) + if !ok { + return nil, false + } + return tree.forrest.keyIO.ReadItem(ctx, ptr) +} + +// LeafToRoots returns the list of potential roots (to pass to +// .AddRoot) that include a given leaf-node. +func (tree *RebuiltTree) LeafToRoots(ctx context.Context, leaf btrfsvol.LogicalAddr) containers.Set[btrfsvol.LogicalAddr] { + if tree.forrest.graph.Nodes[leaf].Level != 0 { + panic(fmt.Errorf("should not happen: (tree=%v).LeafToRoots(leaf=%v): not a leaf", + tree.ID, leaf)) + } + tree.mu.RLock() + defer tree.mu.RUnlock() + ret := make(containers.Set[btrfsvol.LogicalAddr]) + for root := range tree.leafToRoots(ctx)[leaf] { + if tree.Roots.Has(root) { + panic(fmt.Errorf("should not happen: (tree=%v).LeafToRoots(leaf=%v): tree contains root=%v but not leaf", + tree.ID, leaf, root)) + } + ret.Insert(root) + } + if len(ret) == 0 { + return nil + } + return ret +} |