From 05456e5ffe328c975529645f8e1588ef89f49da0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Wed, 5 Apr 2023 11:18:11 -0600 Subject: btrfsutil: RebuiltForrest: Simplify by taking a btrfs.ReadableFS --- cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go | 2 +- cmd/btrfs-rec/inspect/rebuildtrees/scan.go | 4 ---- lib/btrfsutil/rebuilt_forrest.go | 22 ++++++++++++---------- lib/btrfsutil/rebuilt_readitem.go | 4 ++-- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go index e6345ae..4787dbf 100644 --- a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go +++ b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go @@ -85,7 +85,7 @@ func NewRebuilder(ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.Logical o := &rebuilder{ scan: scanData, } - o.rebuilt = btrfsutil.NewRebuiltForrest(fs, scanData.Superblock, scanData.Graph, forrestCallbacks{o}) + o.rebuilt = btrfsutil.NewRebuiltForrest(fs, scanData.Graph, forrestCallbacks{o}) return o, nil } diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/scan.go b/cmd/btrfs-rec/inspect/rebuildtrees/scan.go index b1469ff..7518896 100644 --- a/cmd/btrfs-rec/inspect/rebuildtrees/scan.go +++ b/cmd/btrfs-rec/inspect/rebuildtrees/scan.go @@ -32,8 +32,6 @@ type FlagsAndErr struct { } type ScanDevicesResult struct { - Superblock btrfstree.Superblock - Graph btrfsutil.Graph Flags map[btrfsutil.ItemPtr]FlagsAndErr // INODE_ITEM @@ -53,8 +51,6 @@ func ScanDevices(_ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.Logical // read-roots ////////////////////////////////////////////////////////////////// ctx = dlog.WithField(_ctx, "btrfs.inspect.rebuild-trees.read.substep", "read-roots") ret := ScanDevicesResult{ - Superblock: *sb, - Graph: btrfsutil.NewGraph(ctx, *sb), Flags: make(map[btrfsutil.ItemPtr]FlagsAndErr), Names: make(map[btrfsutil.ItemPtr][]byte), diff --git a/lib/btrfsutil/rebuilt_forrest.go b/lib/btrfsutil/rebuilt_forrest.go index 6231563..99213f1 100644 --- a/lib/btrfsutil/rebuilt_forrest.go +++ b/lib/btrfsutil/rebuilt_forrest.go @@ -10,9 +10,9 @@ import ( "github.com/datawire/dlib/dlog" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/containers" "git.lukeshu.com/btrfs-progs-ng/lib/slices" @@ -134,8 +134,7 @@ func (cb noopRebuiltForrestCallbacks) LookupUUID(ctx context.Context, uuid btrfs // NewRebuiltForrest(). type RebuiltForrest struct { // static - file btrfstree.NodeSource - sb btrfstree.Superblock + inner btrfs.ReadableFS graph Graph cb RebuiltForrestCallbacks @@ -149,10 +148,9 @@ type RebuiltForrest struct { // NewRebuiltForrest returns a new RebuiltForrest instance. The // RebuiltForrestCallbacks may be nil. -func NewRebuiltForrest(file btrfstree.NodeSource, sb btrfstree.Superblock, graph Graph, cb RebuiltForrestCallbacks) *RebuiltForrest { +func NewRebuiltForrest(fs btrfs.ReadableFS, graph Graph, cb RebuiltForrestCallbacks) *RebuiltForrest { ret := &RebuiltForrest{ - file: file, - sb: sb, + inner: fs, graph: graph, cb: cb, @@ -213,13 +211,17 @@ func (ts *RebuiltForrest) addTree(ctx context.Context, treeID btrfsprim.ObjID, s var root btrfsvol.LogicalAddr switch treeID { case btrfsprim.ROOT_TREE_OBJECTID: - root = ts.sb.RootTree + sb, _ := ts.inner.Superblock() + root = sb.RootTree case btrfsprim.CHUNK_TREE_OBJECTID: - root = ts.sb.ChunkTree + sb, _ := ts.inner.Superblock() + root = sb.ChunkTree case btrfsprim.TREE_LOG_OBJECTID: - root = ts.sb.LogTree + sb, _ := ts.inner.Superblock() + root = sb.LogTree case btrfsprim.BLOCK_GROUP_TREE_OBJECTID: - root = ts.sb.BlockGroupRoot + sb, _ := ts.inner.Superblock() + root = sb.BlockGroupRoot default: if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) { dlog.Error(ctx, "failed to add tree: add ROOT_TREE") diff --git a/lib/btrfsutil/rebuilt_readitem.go b/lib/btrfsutil/rebuilt_readitem.go index 80a9e4b..d1d1319 100644 --- a/lib/btrfsutil/rebuilt_readitem.go +++ b/lib/btrfsutil/rebuilt_readitem.go @@ -36,7 +36,7 @@ func (ts *RebuiltForrest) readItem(ctx context.Context, ptr ItemPtr) btrfsitem.I panic(fmt.Errorf("should not happen: btrfsutil.RebuiltForrest.readItem called for negative item slot: %v", ptr.Slot)) } - node, err := ts.file.AcquireNode(ctx, ptr.Node, btrfstree.NodeExpectations{ + node, err := ts.inner.AcquireNode(ctx, ptr.Node, btrfstree.NodeExpectations{ LAddr: containers.OptionalValue(ptr.Node), Level: containers.OptionalValue(graphInfo.Level), Generation: containers.OptionalValue(graphInfo.Generation), @@ -51,7 +51,7 @@ func (ts *RebuiltForrest) readItem(ctx context.Context, ptr ItemPtr) btrfsitem.I MinItem: containers.OptionalValue(graphInfo.MinItem(ts.graph)), MaxItem: containers.OptionalValue(graphInfo.MaxItem(ts.graph)), }) - defer ts.file.ReleaseNode(node) + defer ts.inner.ReleaseNode(node) if err != nil { panic(fmt.Errorf("should not happen: i/o error: %w", err)) } -- cgit v1.1-4-g5e80 From c88331692ea676acf67eb489a600688cf0c62fc6 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 7 Apr 2023 13:43:52 -0600 Subject: btrfsutil: Split rebuilt_callbacks.go off from rebuilt_forrest.go --- lib/btrfsutil/rebuilt_callbacks.go | 85 ++++++++++++++++++++++++++++++++++++++ lib/btrfsutil/rebuilt_forrest.go | 73 -------------------------------- 2 files changed, 85 insertions(+), 73 deletions(-) create mode 100644 lib/btrfsutil/rebuilt_callbacks.go diff --git a/lib/btrfsutil/rebuilt_callbacks.go b/lib/btrfsutil/rebuilt_callbacks.go new file mode 100644 index 0000000..fef16d4 --- /dev/null +++ b/lib/btrfsutil/rebuilt_callbacks.go @@ -0,0 +1,85 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrfsutil + +import ( + "context" + "fmt" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" +) + +type RebuiltForrestCallbacks interface { + AddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) + AddedRoot(ctx context.Context, tree btrfsprim.ObjID, root btrfsvol.LogicalAddr) + LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) + LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) +} + +type noopRebuiltForrestCallbacks struct { + forrest *RebuiltForrest +} + +func (noopRebuiltForrestCallbacks) AddedItem(context.Context, btrfsprim.ObjID, btrfsprim.Key) {} +func (noopRebuiltForrestCallbacks) AddedRoot(context.Context, btrfsprim.ObjID, btrfsvol.LogicalAddr) { +} + +func (cb noopRebuiltForrestCallbacks) LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, _item btrfsitem.Root, ok bool) { + rootTree := cb.forrest.RebuiltTree(ctx, btrfsprim.ROOT_TREE_OBJECTID) + if rootTree == nil { + return 0, btrfsitem.Root{}, false + } + tgt := btrfsprim.Key{ + ObjectID: tree, + ItemType: btrfsprim.ROOT_ITEM_KEY, + } + itemKey, itemPtr, ok := rootTree.RebuiltAcquireItems(ctx).Search(func(key btrfsprim.Key, _ ItemPtr) int { + key.Offset = 0 + return tgt.Compare(key) + }) + rootTree.RebuiltReleaseItems() + if !ok { + return 0, btrfsitem.Root{}, false + } + itemBody := cb.forrest.readItem(ctx, itemPtr) + defer itemBody.Free() + switch itemBody := itemBody.(type) { + case *btrfsitem.Root: + return btrfsprim.Generation(itemKey.Offset), *itemBody, true + case *btrfsitem.Error: + return 0, btrfsitem.Root{}, false + default: + // This is a panic because the item decoder should not emit ROOT_ITEM items as anything but + // btrfsitem.Root or btrfsitem.Error without this code also being updated. + panic(fmt.Errorf("should not happen: ROOT_ITEM item has unexpected type: %T", itemBody)) + } +} + +func (cb noopRebuiltForrestCallbacks) LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) { + uuidTree := cb.forrest.RebuiltTree(ctx, btrfsprim.UUID_TREE_OBJECTID) + if uuidTree == nil { + return 0, false + } + tgt := btrfsitem.UUIDToKey(uuid) + itemPtr, ok := uuidTree.RebuiltAcquireItems(ctx).Load(tgt) + uuidTree.RebuiltReleaseItems() + if !ok { + return 0, false + } + itemBody := cb.forrest.readItem(ctx, itemPtr) + defer itemBody.Free() + switch itemBody := itemBody.(type) { + case *btrfsitem.UUIDMap: + return itemBody.ObjID, true + case *btrfsitem.Error: + return 0, false + default: + // This is a panic because the item decoder should not emit UUID_SUBVOL items as anything but + // btrfsitem.UUIDMap or btrfsitem.Error without this code also being updated. + panic(fmt.Errorf("should not happen: UUID_SUBVOL item has unexpected type: %T", itemBody)) + } +} diff --git a/lib/btrfsutil/rebuilt_forrest.go b/lib/btrfsutil/rebuilt_forrest.go index 99213f1..29a5210 100644 --- a/lib/btrfsutil/rebuilt_forrest.go +++ b/lib/btrfsutil/rebuilt_forrest.go @@ -6,89 +6,16 @@ package btrfsutil import ( "context" - "fmt" "github.com/datawire/dlib/dlog" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" - "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" "git.lukeshu.com/btrfs-progs-ng/lib/containers" "git.lukeshu.com/btrfs-progs-ng/lib/slices" ) -type RebuiltForrestCallbacks interface { - AddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) - AddedRoot(ctx context.Context, tree btrfsprim.ObjID, root btrfsvol.LogicalAddr) - LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) - LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) -} - -type noopRebuiltForrestCallbacks struct { - forrest *RebuiltForrest -} - -func (noopRebuiltForrestCallbacks) AddedItem(context.Context, btrfsprim.ObjID, btrfsprim.Key) {} -func (noopRebuiltForrestCallbacks) AddedRoot(context.Context, btrfsprim.ObjID, btrfsvol.LogicalAddr) { -} - -func (cb noopRebuiltForrestCallbacks) LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, _item btrfsitem.Root, ok bool) { - rootTree := cb.forrest.RebuiltTree(ctx, btrfsprim.ROOT_TREE_OBJECTID) - if rootTree == nil { - return 0, btrfsitem.Root{}, false - } - tgt := btrfsprim.Key{ - ObjectID: tree, - ItemType: btrfsprim.ROOT_ITEM_KEY, - } - itemKey, itemPtr, ok := rootTree.RebuiltAcquireItems(ctx).Search(func(key btrfsprim.Key, _ ItemPtr) int { - key.Offset = 0 - return tgt.Compare(key) - }) - rootTree.RebuiltReleaseItems() - if !ok { - return 0, btrfsitem.Root{}, false - } - itemBody := cb.forrest.readItem(ctx, itemPtr) - defer itemBody.Free() - switch itemBody := itemBody.(type) { - case *btrfsitem.Root: - return btrfsprim.Generation(itemKey.Offset), *itemBody, true - case *btrfsitem.Error: - return 0, btrfsitem.Root{}, false - default: - // This is a panic because the item decoder should not emit ROOT_ITEM items as anything but - // btrfsitem.Root or btrfsitem.Error without this code also being updated. - panic(fmt.Errorf("should not happen: ROOT_ITEM item has unexpected type: %T", itemBody)) - } -} - -func (cb noopRebuiltForrestCallbacks) LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) { - uuidTree := cb.forrest.RebuiltTree(ctx, btrfsprim.UUID_TREE_OBJECTID) - if uuidTree == nil { - return 0, false - } - tgt := btrfsitem.UUIDToKey(uuid) - itemPtr, ok := uuidTree.RebuiltAcquireItems(ctx).Load(tgt) - uuidTree.RebuiltReleaseItems() - if !ok { - return 0, false - } - itemBody := cb.forrest.readItem(ctx, itemPtr) - defer itemBody.Free() - switch itemBody := itemBody.(type) { - case *btrfsitem.UUIDMap: - return itemBody.ObjID, true - case *btrfsitem.Error: - return 0, false - default: - // This is a panic because the item decoder should not emit UUID_SUBVOL items as anything but - // btrfsitem.UUIDMap or btrfsitem.Error without this code also being updated. - panic(fmt.Errorf("should not happen: UUID_SUBVOL item has unexpected type: %T", itemBody)) - } -} - // RebuiltForrest is an abstraction for rebuilding and accessing // potentially broken btrees. // -- cgit v1.1-4-g5e80 From 01d7bcfc588c5551e8ac387f62d2fc4468ba1f4c Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 7 Apr 2023 13:52:35 -0600 Subject: btrfsutil: RebuiltForrest: Allow skipping the .AddedItem loop --- .../inspect/rebuildtrees/rebuild_treecb.go | 5 ++- lib/btrfsutil/rebuilt_callbacks.go | 7 +++- lib/btrfsutil/rebuilt_forrest.go | 7 +++- lib/btrfsutil/rebuilt_tree.go | 48 +++++++++++++--------- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go index 92b5ee5..0c52556 100644 --- a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go +++ b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild_treecb.go @@ -11,13 +11,16 @@ import ( "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsutil" ) type forrestCallbacks struct { *rebuilder } -// AddedItem implements btrfsutil.RebuiltForrestCallbacks. +var _ btrfsutil.RebuiltForrestExtendedCallbacks = forrestCallbacks{} + +// AddedItem implements btrfsutil.RebuiltForrestExtendedCallbacks. func (o forrestCallbacks) AddedItem(_ context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) { o.addedItemQueue.Insert(keyAndTree{ TreeID: tree, diff --git a/lib/btrfsutil/rebuilt_callbacks.go b/lib/btrfsutil/rebuilt_callbacks.go index fef16d4..3a7e6e6 100644 --- a/lib/btrfsutil/rebuilt_callbacks.go +++ b/lib/btrfsutil/rebuilt_callbacks.go @@ -14,17 +14,20 @@ import ( ) type RebuiltForrestCallbacks interface { - AddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) AddedRoot(ctx context.Context, tree btrfsprim.ObjID, root btrfsvol.LogicalAddr) LookupRoot(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool) LookupUUID(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool) } +type RebuiltForrestExtendedCallbacks interface { + RebuiltForrestCallbacks + AddedItem(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key) +} + type noopRebuiltForrestCallbacks struct { forrest *RebuiltForrest } -func (noopRebuiltForrestCallbacks) AddedItem(context.Context, btrfsprim.ObjID, btrfsprim.Key) {} func (noopRebuiltForrestCallbacks) AddedRoot(context.Context, btrfsprim.ObjID, btrfsvol.LogicalAddr) { } diff --git a/lib/btrfsutil/rebuilt_forrest.go b/lib/btrfsutil/rebuilt_forrest.go index 29a5210..4249396 100644 --- a/lib/btrfsutil/rebuilt_forrest.go +++ b/lib/btrfsutil/rebuilt_forrest.go @@ -73,8 +73,11 @@ type RebuiltForrest struct { rebuiltSharedCache } -// NewRebuiltForrest returns a new RebuiltForrest instance. The -// RebuiltForrestCallbacks may be nil. +// NewRebuiltForrest returns a new RebuiltForrest instance. +// +// The `cb` RebuiltForrestCallbacks may be nil. If `cb` also +// implements RebuiltForrestExtendedCallbacks, then a series of +// .AddedItem() calls will be made before each call to .AddedRoot(). func NewRebuiltForrest(fs btrfs.ReadableFS, graph Graph, cb RebuiltForrestCallbacks) *RebuiltForrest { ret := &RebuiltForrest{ inner: fs, diff --git a/lib/btrfsutil/rebuilt_tree.go b/lib/btrfsutil/rebuilt_tree.go index 31a31be..e4d5ddd 100644 --- a/lib/btrfsutil/rebuilt_tree.go +++ b/lib/btrfsutil/rebuilt_tree.go @@ -320,37 +320,45 @@ func (tree *RebuiltTree) RebuiltAddRoot(ctx context.Context, rootNode btrfsvol.L ctx = dlog.WithField(ctx, "btrfs.util.rebuilt-tree.add-root", fmt.Sprintf("tree=%v rootNode=%v", tree.ID, rootNode)) dlog.Info(ctx, "adding root...") - var stats rebuiltRootStats - leafToRoots := tree.acquireLeafToRoots(ctx) - stats.Leafs.D = len(leafToRoots) - progressWriter := textui.NewProgress[rebuiltRootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) - for i, leaf := range maps.SortedKeys(leafToRoots) { - stats.Leafs.N = i - progressWriter.Set(stats) + shouldFlush := tree.ID == btrfsprim.ROOT_TREE_OBJECTID || tree.ID == btrfsprim.UUID_TREE_OBJECTID + + if extCB, ok := tree.forrest.cb.(RebuiltForrestExtendedCallbacks); ok { + var stats rebuiltRootStats + leafToRoots := tree.acquireLeafToRoots(ctx) + stats.Leafs.D = len(leafToRoots) + progressWriter := textui.NewProgress[rebuiltRootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) + for i, leaf := range maps.SortedKeys(leafToRoots) { + stats.Leafs.N = i + progressWriter.Set(stats) - if tree.Roots.HasAny(leafToRoots[leaf]) || !leafToRoots[leaf].Has(rootNode) { - continue - } + if tree.Roots.HasAny(leafToRoots[leaf]) || !leafToRoots[leaf].Has(rootNode) { + continue + } + + stats.AddedLeafs++ + progressWriter.Set(stats) - stats.AddedLeafs++ + for _, itemKey := range tree.forrest.graph.Nodes[leaf].Items { + extCB.AddedItem(ctx, tree.ID, itemKey) + stats.AddedItems++ + progressWriter.Set(stats) + } + } + stats.Leafs.N = len(leafToRoots) + tree.releaseLeafToRoots() progressWriter.Set(stats) + progressWriter.Done() - for _, itemKey := range tree.forrest.graph.Nodes[leaf].Items { - tree.forrest.cb.AddedItem(ctx, tree.ID, itemKey) - stats.AddedItems++ - progressWriter.Set(stats) + if stats.AddedItems == 0 { + shouldFlush = false } } - stats.Leafs.N = len(leafToRoots) - tree.releaseLeafToRoots() - progressWriter.Set(stats) - progressWriter.Done() tree.Roots.Insert(rootNode) tree.forrest.incItems.Delete(tree.ID) // force re-gen tree.forrest.excItems.Delete(tree.ID) // force re-gen - if (tree.ID == btrfsprim.ROOT_TREE_OBJECTID || tree.ID == btrfsprim.UUID_TREE_OBJECTID) && stats.AddedItems > 0 { + if shouldFlush { tree.forrest.flushNegativeCache(ctx) } tree.forrest.cb.AddedRoot(ctx, tree.ID, rootNode) -- cgit v1.1-4-g5e80 From e190c4b1318229ce6c1a074a541ee4fb94228726 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 7 Apr 2023 19:24:21 -0600 Subject: btrfsutil: RebuiltTree: Refactor .uncachedLeafToRoots() --- lib/btrfsutil/rebuilt_tree.go | 73 ++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/lib/btrfsutil/rebuilt_tree.go b/lib/btrfsutil/rebuilt_tree.go index e4d5ddd..cbd884c 100644 --- a/lib/btrfsutil/rebuilt_tree.go +++ b/lib/btrfsutil/rebuilt_tree.go @@ -86,24 +86,14 @@ func (tree *RebuiltTree) releaseLeafToRoots() { func (tree *RebuiltTree) uncachedLeafToRoots(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { ctx = dlog.WithField(ctx, "btrfs.util.rebuilt-tree.index-nodes", fmt.Sprintf("tree=%v", tree.ID)) - nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + indexer := &rebuiltNodeIndexer{ + tree: tree, - var stats textui.Portion[int] - stats.D = len(tree.forrest.graph.Nodes) - progressWriter := textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) - progress := func() { - stats.N = len(nodeToRoots) - progressWriter.Set(stats) - } - - progress() - for _, node := range maps.SortedKeys(tree.forrest.graph.Nodes) { - tree.indexNode(ctx, node, nodeToRoots, progress, nil) + nodeToRoots: make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]), } - progressWriter.Done() ret := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) - for node, roots := range nodeToRoots { + for node, roots := range indexer.run(ctx) { if tree.forrest.graph.Nodes[node].Level == 0 && len(roots) > 0 { ret[node] = roots } @@ -111,12 +101,40 @@ func (tree *RebuiltTree) uncachedLeafToRoots(ctx context.Context) map[btrfsvol.L return ret } -func (tree *RebuiltTree) indexNode(ctx context.Context, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) { - defer progress() +type rebuiltNodeIndexer struct { + // Input + tree *RebuiltTree + + // Output + nodeToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] + + // State + stats textui.Portion[int] + progressWriter *textui.Progress[textui.Portion[int]] +} + +func (indexer *rebuiltNodeIndexer) run(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { + indexer.stats.D = len(indexer.tree.forrest.graph.Nodes) + indexer.progressWriter = textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) + indexer.updateProgress() + for _, node := range maps.SortedKeys(indexer.tree.forrest.graph.Nodes) { + indexer.node(ctx, node, nil) + } + indexer.progressWriter.Done() + return indexer.nodeToRoots +} + +func (indexer *rebuiltNodeIndexer) updateProgress() { + indexer.stats.N = len(indexer.nodeToRoots) + indexer.progressWriter.Set(indexer.stats) +} + +func (indexer *rebuiltNodeIndexer) node(ctx context.Context, node btrfsvol.LogicalAddr, stack []btrfsvol.LogicalAddr) { + defer indexer.updateProgress() if err := ctx.Err(); err != nil { return } - if maps.HasKey(index, node) { + if maps.HasKey(indexer.nodeToRoots, node) { return } if slices.Contains(node, stack) { @@ -124,35 +142,38 @@ func (tree *RebuiltTree) indexNode(ctx context.Context, node btrfsvol.LogicalAdd // have already checked for loops. panic("loop") } - if !tree.isOwnerOK(tree.forrest.graph.Nodes[node].Owner, tree.forrest.graph.Nodes[node].Generation) { - index[node] = nil + if !indexer.tree.isOwnerOK(indexer.tree.forrest.graph.Nodes[node].Owner, indexer.tree.forrest.graph.Nodes[node].Generation) { + indexer.nodeToRoots[node] = nil return } - // tree.leafToRoots stack = append(stack, node) var roots containers.Set[btrfsvol.LogicalAddr] - for _, kp := range tree.forrest.graph.EdgesTo[node] { - if !tree.isOwnerOK(tree.forrest.graph.Nodes[kp.FromNode].Owner, tree.forrest.graph.Nodes[kp.FromNode].Generation) { + for _, kp := range indexer.tree.forrest.graph.EdgesTo[node] { + if !indexer.tree.isOwnerOK(indexer.tree.forrest.graph.Nodes[kp.FromNode].Owner, indexer.tree.forrest.graph.Nodes[kp.FromNode].Generation) { continue } - tree.indexNode(ctx, kp.FromNode, index, progress, stack) - if len(index[kp.FromNode]) > 0 { + indexer.node(ctx, kp.FromNode, stack) + if len(indexer.nodeToRoots[kp.FromNode]) > 0 { if roots == nil { roots = make(containers.Set[btrfsvol.LogicalAddr]) } - roots.InsertFrom(index[kp.FromNode]) + roots.InsertFrom(indexer.nodeToRoots[kp.FromNode]) } } if roots == nil { roots = containers.NewSet[btrfsvol.LogicalAddr](node) } - index[node] = roots + indexer.nodeToRoots[node] = roots } // isOwnerOK returns whether it is permissible for a node with // .Head.Owner=owner and .Head.Generation=gen to be in this tree. func (tree *RebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool { + // It is important that this not perform allocations, even in + // the "false"/failure case. It will be called lots of times + // in a tight loop for both values that pass and values that + // fail. for { if owner == tree.ID { return true -- cgit v1.1-4-g5e80 From b64f5a9be215f5831e3948c051f6d36189c9fcb9 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 9 Apr 2023 16:23:49 -0600 Subject: btrfsutil: RebuiltTree: Wrap leafToRoots in a struct The implication being that I plan on adding more members to the struct. --- lib/btrfsutil/rebuilt_tree.go | 54 ++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/lib/btrfsutil/rebuilt_tree.go b/lib/btrfsutil/rebuilt_tree.go index cbd884c..198bd85 100644 --- a/lib/btrfsutil/rebuilt_tree.go +++ b/lib/btrfsutil/rebuilt_tree.go @@ -37,24 +37,24 @@ type RebuiltTree struct { // derived from tree.Roots, which is why it's OK if they get // evicted. // - // 1. tree.acquireLeafToRoots() = tree.forrest.leafs.Acquire(tree.ID) + // 1. tree.acquireNodeIndex() = tree.forrest.nodeIndex.Acquire(tree.ID) // 2. tree.RebuiltAcquireItems() = tree.forrest.incItems.Acquire(tree.ID) // 3. tree.RebuiltAcquirePotentialItems() = tree.forrest.excItems.Acquire(tree.ID) } type rebuiltSharedCache struct { - leafs containers.Cache[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]] - incItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]] - excItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]] + nodeIndex containers.Cache[btrfsprim.ObjID, rebuiltNodeIndex] + incItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]] + excItems containers.Cache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]] } func makeRebuiltSharedCache(forrest *RebuiltForrest) rebuiltSharedCache { var ret rebuiltSharedCache - ret.leafs = containers.NewARCache[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]]( + ret.nodeIndex = containers.NewARCache[btrfsprim.ObjID, rebuiltNodeIndex]( textui.Tunable(8), - containers.SourceFunc[btrfsprim.ObjID, map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]]( - func(ctx context.Context, treeID btrfsprim.ObjID, leafs *map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) { - *leafs = forrest.trees[treeID].uncachedLeafToRoots(ctx) + containers.SourceFunc[btrfsprim.ObjID, rebuiltNodeIndex]( + func(ctx context.Context, treeID btrfsprim.ObjID, index *rebuiltNodeIndex) { + *index = forrest.trees[treeID].uncachedNodeIndex(ctx) })) ret.incItems = containers.NewARCache[btrfsprim.ObjID, containers.SortedMap[btrfsprim.Key, ItemPtr]]( textui.Tunable(8), @@ -71,19 +71,23 @@ func makeRebuiltSharedCache(forrest *RebuiltForrest) rebuiltSharedCache { return ret } -// evictable member 1: .acquireLeafToRoots() /////////////////////////////////////////////////////////////////////////// +// evictable member 1: .acquireNodeIndex() ///////////////////////////////////////////////////////////////////////////// -// acquireLeafToRoots returns all leafs (lvl=0) in the filesystem that -// pass .isOwnerOK, whether or not they're in the tree. -func (tree *RebuiltTree) acquireLeafToRoots(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { - return *tree.forrest.leafs.Acquire(ctx, tree.ID) +type rebuiltNodeIndex struct { + // leafToRoots contains all leafs (lvl=0) in the filesystem + // that pass .isOwnerOK, whether or not they're in the tree. + leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] } -func (tree *RebuiltTree) releaseLeafToRoots() { - tree.forrest.leafs.Release(tree.ID) +func (tree *RebuiltTree) acquireNodeIndex(ctx context.Context) rebuiltNodeIndex { + return *tree.forrest.nodeIndex.Acquire(ctx, tree.ID) } -func (tree *RebuiltTree) uncachedLeafToRoots(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { +func (tree *RebuiltTree) releaseNodeIndex() { + tree.forrest.nodeIndex.Release(tree.ID) +} + +func (tree *RebuiltTree) uncachedNodeIndex(ctx context.Context) rebuiltNodeIndex { ctx = dlog.WithField(ctx, "btrfs.util.rebuilt-tree.index-nodes", fmt.Sprintf("tree=%v", tree.ID)) indexer := &rebuiltNodeIndexer{ @@ -92,10 +96,12 @@ func (tree *RebuiltTree) uncachedLeafToRoots(ctx context.Context) map[btrfsvol.L nodeToRoots: make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]), } - ret := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]) + ret := rebuiltNodeIndex{ + leafToRoots: make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]), + } for node, roots := range indexer.run(ctx) { if tree.forrest.graph.Nodes[node].Level == 0 && len(roots) > 0 { - ret[node] = roots + ret.leafToRoots[node] = roots } } return ret @@ -247,12 +253,12 @@ func (tree *RebuiltTree) items(ctx context.Context, inc bool) containers.SortedM defer tree.mu.RUnlock() var leafs []btrfsvol.LogicalAddr - for leaf, roots := range tree.acquireLeafToRoots(ctx) { + for leaf, roots := range tree.acquireNodeIndex(ctx).leafToRoots { if tree.Roots.HasAny(roots) == inc { leafs = append(leafs, leaf) } } - tree.releaseLeafToRoots() + tree.releaseNodeIndex() slices.Sort(leafs) var stats rebuiltItemStats @@ -345,7 +351,7 @@ func (tree *RebuiltTree) RebuiltAddRoot(ctx context.Context, rootNode btrfsvol.L if extCB, ok := tree.forrest.cb.(RebuiltForrestExtendedCallbacks); ok { var stats rebuiltRootStats - leafToRoots := tree.acquireLeafToRoots(ctx) + leafToRoots := tree.acquireNodeIndex(ctx).leafToRoots stats.Leafs.D = len(leafToRoots) progressWriter := textui.NewProgress[rebuiltRootStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) for i, leaf := range maps.SortedKeys(leafToRoots) { @@ -366,7 +372,7 @@ func (tree *RebuiltTree) RebuiltAddRoot(ctx context.Context, rootNode btrfsvol.L } } stats.Leafs.N = len(leafToRoots) - tree.releaseLeafToRoots() + tree.releaseNodeIndex() progressWriter.Set(stats) progressWriter.Done() @@ -420,14 +426,14 @@ func (tree *RebuiltTree) RebuiltLeafToRoots(ctx context.Context, leaf btrfsvol.L tree.mu.RLock() defer tree.mu.RUnlock() ret := make(containers.Set[btrfsvol.LogicalAddr]) - for root := range tree.acquireLeafToRoots(ctx)[leaf] { + for root := range tree.acquireNodeIndex(ctx).leafToRoots[leaf] { if tree.Roots.Has(root) { panic(fmt.Errorf("should not happen: (tree=%v).RebuiltLeafToRoots(leaf=%v): tree contains root=%v but not leaf", tree.ID, leaf, root)) } ret.Insert(root) } - tree.releaseLeafToRoots() + tree.releaseNodeIndex() if len(ret) == 0 { return nil } -- cgit v1.1-4-g5e80 From 8f1c5ed840bf701e69f8644f157c5edd66095103 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 31 Mar 2023 18:18:14 -0600 Subject: btrfsutil: RebuiltTree: Track the max-key for each path to a node --- lib/btrfsutil/rebuilt_tree.go | 47 +++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/lib/btrfsutil/rebuilt_tree.go b/lib/btrfsutil/rebuilt_tree.go index 198bd85..c47c930 100644 --- a/lib/btrfsutil/rebuilt_tree.go +++ b/lib/btrfsutil/rebuilt_tree.go @@ -73,10 +73,17 @@ func makeRebuiltSharedCache(forrest *RebuiltForrest) rebuiltSharedCache { // evictable member 1: .acquireNodeIndex() ///////////////////////////////////////////////////////////////////////////// +type rebuiltRoots = map[btrfsvol.LogicalAddr]rebuiltPathInfo + +type rebuiltPathInfo struct { + loMaxItem btrfsprim.Key + hiMaxItem btrfsprim.Key +} + type rebuiltNodeIndex struct { // leafToRoots contains all leafs (lvl=0) in the filesystem // that pass .isOwnerOK, whether or not they're in the tree. - leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] + leafToRoots map[btrfsvol.LogicalAddr]rebuiltRoots } func (tree *RebuiltTree) acquireNodeIndex(ctx context.Context) rebuiltNodeIndex { @@ -93,11 +100,11 @@ func (tree *RebuiltTree) uncachedNodeIndex(ctx context.Context) rebuiltNodeIndex indexer := &rebuiltNodeIndexer{ tree: tree, - nodeToRoots: make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]), + nodeToRoots: make(map[btrfsvol.LogicalAddr]rebuiltRoots), } ret := rebuiltNodeIndex{ - leafToRoots: make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]), + leafToRoots: make(map[btrfsvol.LogicalAddr]rebuiltRoots), } for node, roots := range indexer.run(ctx) { if tree.forrest.graph.Nodes[node].Level == 0 && len(roots) > 0 { @@ -112,14 +119,14 @@ type rebuiltNodeIndexer struct { tree *RebuiltTree // Output - nodeToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] + nodeToRoots map[btrfsvol.LogicalAddr]rebuiltRoots // State stats textui.Portion[int] progressWriter *textui.Progress[textui.Portion[int]] } -func (indexer *rebuiltNodeIndexer) run(ctx context.Context) map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr] { +func (indexer *rebuiltNodeIndexer) run(ctx context.Context) map[btrfsvol.LogicalAddr]rebuiltRoots { indexer.stats.D = len(indexer.tree.forrest.graph.Nodes) indexer.progressWriter = textui.NewProgress[textui.Portion[int]](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) indexer.updateProgress() @@ -154,7 +161,7 @@ func (indexer *rebuiltNodeIndexer) node(ctx context.Context, node btrfsvol.Logic } stack = append(stack, node) - var roots containers.Set[btrfsvol.LogicalAddr] + var roots rebuiltRoots for _, kp := range indexer.tree.forrest.graph.EdgesTo[node] { if !indexer.tree.isOwnerOK(indexer.tree.forrest.graph.Nodes[kp.FromNode].Owner, indexer.tree.forrest.graph.Nodes[kp.FromNode].Generation) { continue @@ -162,13 +169,31 @@ func (indexer *rebuiltNodeIndexer) node(ctx context.Context, node btrfsvol.Logic indexer.node(ctx, kp.FromNode, stack) if len(indexer.nodeToRoots[kp.FromNode]) > 0 { if roots == nil { - roots = make(containers.Set[btrfsvol.LogicalAddr]) + roots = make(rebuiltRoots) + } + for root, rootInfo := range indexer.nodeToRoots[kp.FromNode] { + if kp.FromSlot+1 < len(indexer.tree.forrest.graph.EdgesFrom[kp.FromNode]) { + rootInfo.loMaxItem = indexer.tree.forrest.graph.EdgesFrom[kp.FromNode][kp.FromSlot+1].ToKey.Mm() + rootInfo.hiMaxItem = rootInfo.loMaxItem + } + oldRootInfo, ok := roots[root] + if ok && rootInfo.loMaxItem.Compare(oldRootInfo.loMaxItem) > 0 { + rootInfo.loMaxItem = oldRootInfo.loMaxItem + } + if ok && rootInfo.hiMaxItem.Compare(oldRootInfo.hiMaxItem) < 0 { + rootInfo.hiMaxItem = oldRootInfo.hiMaxItem + } + roots[root] = rootInfo } - roots.InsertFrom(indexer.nodeToRoots[kp.FromNode]) } } if roots == nil { - roots = containers.NewSet[btrfsvol.LogicalAddr](node) + roots = rebuiltRoots{ + node: rebuiltPathInfo{ + loMaxItem: btrfsprim.MaxKey, + hiMaxItem: btrfsprim.MaxKey, + }, + } } indexer.nodeToRoots[node] = roots } @@ -254,7 +279,7 @@ func (tree *RebuiltTree) items(ctx context.Context, inc bool) containers.SortedM var leafs []btrfsvol.LogicalAddr for leaf, roots := range tree.acquireNodeIndex(ctx).leafToRoots { - if tree.Roots.HasAny(roots) == inc { + if maps.HaveAnyKeysInCommon(tree.Roots, roots) == inc { leafs = append(leafs, leaf) } } @@ -358,7 +383,7 @@ func (tree *RebuiltTree) RebuiltAddRoot(ctx context.Context, rootNode btrfsvol.L stats.Leafs.N = i progressWriter.Set(stats) - if tree.Roots.HasAny(leafToRoots[leaf]) || !leafToRoots[leaf].Has(rootNode) { + if maps.HaveAnyKeysInCommon(tree.Roots, leafToRoots[leaf]) || !maps.HasKey(leafToRoots[leaf], rootNode) { continue } -- cgit v1.1-4-g5e80 From 7cfdd821b5241c619696d7884fd12c4d2dbc6d49 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 7 Apr 2023 19:38:05 -0600 Subject: btrfsutil: RebuiltTree: Be strict about which KPs to consider valid --- lib/btrfsutil/rebuilt_tree.go | 51 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/lib/btrfsutil/rebuilt_tree.go b/lib/btrfsutil/rebuilt_tree.go index c47c930..177b859 100644 --- a/lib/btrfsutil/rebuilt_tree.go +++ b/lib/btrfsutil/rebuilt_tree.go @@ -98,10 +98,14 @@ func (tree *RebuiltTree) uncachedNodeIndex(ctx context.Context) rebuiltNodeIndex ctx = dlog.WithField(ctx, "btrfs.util.rebuilt-tree.index-nodes", fmt.Sprintf("tree=%v", tree.ID)) indexer := &rebuiltNodeIndexer{ - tree: tree, + tree: tree, + idToTree: make(map[btrfsprim.ObjID]*RebuiltTree), nodeToRoots: make(map[btrfsvol.LogicalAddr]rebuiltRoots), } + for ancestor := tree; ancestor != nil; ancestor = ancestor.Parent { + indexer.idToTree[ancestor.ID] = ancestor + } ret := rebuiltNodeIndex{ leafToRoots: make(map[btrfsvol.LogicalAddr]rebuiltRoots), @@ -116,7 +120,8 @@ func (tree *RebuiltTree) uncachedNodeIndex(ctx context.Context) rebuiltNodeIndex type rebuiltNodeIndexer struct { // Input - tree *RebuiltTree + tree *RebuiltTree + idToTree map[btrfsprim.ObjID]*RebuiltTree // Output nodeToRoots map[btrfsvol.LogicalAddr]rebuiltRoots @@ -155,26 +160,53 @@ func (indexer *rebuiltNodeIndexer) node(ctx context.Context, node btrfsvol.Logic // have already checked for loops. panic("loop") } - if !indexer.tree.isOwnerOK(indexer.tree.forrest.graph.Nodes[node].Owner, indexer.tree.forrest.graph.Nodes[node].Generation) { + nodeInfo := indexer.tree.forrest.graph.Nodes[node] + if !indexer.tree.isOwnerOK(nodeInfo.Owner, nodeInfo.Generation) { indexer.nodeToRoots[node] = nil return } stack = append(stack, node) var roots rebuiltRoots +nextKP: for _, kp := range indexer.tree.forrest.graph.EdgesTo[node] { - if !indexer.tree.isOwnerOK(indexer.tree.forrest.graph.Nodes[kp.FromNode].Owner, indexer.tree.forrest.graph.Nodes[kp.FromNode].Generation) { - continue + // The cheap expectations to check. + if kp.ToLevel != nodeInfo.Level || + kp.ToKey.Compare(nodeInfo.MinItem(indexer.tree.forrest.graph)) > 0 || + kp.ToGeneration != nodeInfo.Generation { + continue nextKP + } + // The MaxItem expectation is only cheap to check if + // the KP isn't in the last slot. If it isn't in the + // last slot, we'll wait to check it until after we've + // indexed kp.FromNode. + if kp.FromSlot+1 < len(indexer.tree.forrest.graph.EdgesFrom[kp.FromNode]) { + kpMaxItem := indexer.tree.forrest.graph.EdgesFrom[kp.FromNode][kp.FromSlot+1].ToKey.Mm() + if kpMaxItem.Compare(nodeInfo.MaxItem(indexer.tree.forrest.graph)) < 0 { + continue nextKP + } + } + // isOwnerOK is O(n) on the number of parents, so + // avoid doing it if possible. + if fromTree := indexer.idToTree[kp.FromTree]; fromTree == nil || !fromTree.isOwnerOK(nodeInfo.Owner, nodeInfo.Generation) { + continue nextKP } + indexer.node(ctx, kp.FromNode, stack) if len(indexer.nodeToRoots[kp.FromNode]) > 0 { - if roots == nil { - roots = make(rebuiltRoots) - } for root, rootInfo := range indexer.nodeToRoots[kp.FromNode] { if kp.FromSlot+1 < len(indexer.tree.forrest.graph.EdgesFrom[kp.FromNode]) { rootInfo.loMaxItem = indexer.tree.forrest.graph.EdgesFrom[kp.FromNode][kp.FromSlot+1].ToKey.Mm() rootInfo.hiMaxItem = rootInfo.loMaxItem + } else { + // OK, now check the MaxItem expectation. + // + // We'll use the hiMaxItem, because it only needs to be + // valid in *one* of the paths to this node. + kpMaxItem := rootInfo.hiMaxItem + if kpMaxItem.Compare(nodeInfo.MaxItem(indexer.tree.forrest.graph)) < 0 { + continue nextKP + } } oldRootInfo, ok := roots[root] if ok && rootInfo.loMaxItem.Compare(oldRootInfo.loMaxItem) > 0 { @@ -183,6 +215,9 @@ func (indexer *rebuiltNodeIndexer) node(ctx context.Context, node btrfsvol.Logic if ok && rootInfo.hiMaxItem.Compare(oldRootInfo.hiMaxItem) < 0 { rootInfo.hiMaxItem = oldRootInfo.hiMaxItem } + if roots == nil { + roots = make(rebuiltRoots) + } roots[root] = rootInfo } } -- cgit v1.1-4-g5e80 From 1879a68976052d06cc62473a0712df9ce89b5013 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 10 Apr 2023 10:03:06 -0600 Subject: rebuildtrees: Fuss with the settledItemQueue order to reduce cache-misses --- cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go | 181 +++++++++++++++++++++++++- cmd/btrfs-rec/inspect/rebuildtrees/scan.go | 39 +++++- lib/textui/log.go | 2 + 3 files changed, 209 insertions(+), 13 deletions(-) diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go index 4787dbf..d1ccfac 100644 --- a/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go +++ b/cmd/btrfs-rec/inspect/rebuildtrees/rebuild.go @@ -224,6 +224,153 @@ func (o *rebuilder) processAddedItemQueue(ctx context.Context) error { return nil } +type itemToVisit struct { + SortTreeID btrfsprim.ObjID // Use this tree ID for sorting, but not lookups + keyAndTree + RefNum int // Only for EXTENT_ITEM and METADATA_ITEM +} + +func (k itemToVisit) String() string { + if k.TreeID == btrfsprim.EXTENT_TREE_OBJECTID && + (k.ItemType == btrfsprim.EXTENT_ITEM_KEY || k.ItemType == btrfsprim.METADATA_ITEM_KEY) { + return textui.Sprintf("%v#%d", k.keyAndTree, k.RefNum) + } + return textui.Sprintf("%v", k.keyAndTree) +} + +func (a itemToVisit) Compare(b itemToVisit) int { + if d := containers.NativeCompare(a.SortTreeID, b.SortTreeID); d != 0 { + return d + } + if d := a.keyAndTree.Compare(b.keyAndTree); d != 0 { + return d + } + return containers.NativeCompare(a.RefNum, b.RefNum) +} + +// sortSettledItemQueue is like a the usual simple by-key sort; but +// applies a different sort-order to members of the EXTENT_TREE. It +// sorts those members by the FS trees of the referencing inodes, +// rather than by the laddr of the extent being referenced. This +// greatly reduces the number of .RebuiltAcquireItems() cache-misses. +func (o *rebuilder) sortSettledItemQueue(ctx context.Context, unorderedQueue containers.Set[keyAndTree]) []itemToVisit { + // Like many problems, the trick isn't answering the question, + // it's asking the right question. + // + // "Naively", the problem might be stated as: + // + // Definitions: + // + // An "item" contains a set of 0 or more (`uint64`) "tree + // IDs". "Processing" an item does a cache-load operation + // (from a replacement cache) for each tree ID. + // + // Problem: + // + // Given a list of items, sort the list in a manor that + // minimizes cache-misses when processing the items in the + // list in order. Does the cache size or cache + // replacement policy affect what the optimal order is? + // + // Discussion: + // + // Put another way, sort the list such that items + // containing the same tree IDs are near to eachother. If + // each item only contained 1 tree ID, this would be + // simple: sort by that tree ID. The difficulty of the + // question is how to weight each tree ID when items + // contain multiple; if an item contains tree IDs 'A' and + // 'B', and putting it near other items with 'A' if that + // means putting it farther from other items with 'B', + // when is it worth it to do so? + // + // The most obvious approach that is independent of the cache + // size/policy is to minimize the total distance between items + // within the same set. It turns out that this is the + // "Minimum Linear Arrangement" problem ("MinLA"), which is + // NP-hard. But, if you were paying attention, it's not quite + // MinLA; in our once two items are far enough apart that a + // cache eviction happens between them, there's no cost to + // moving them farther apart. And continuing to try to keep + // them close (instead of giving up on them) results in + // sub-optimal arrangements. So not only is MinLA + // computationally expensive for us to try to approximate a + // solution for, it won't actually give us a very good + // solution! + // + // So you might think "Ah, the trick is to not ask MinLA, the + // trick is to ask this MinLA-with-capped-cost question!" But + // we can find an even better question! + // + // Some concrete numbers to help with thinking about this: In + // my test trees, the maximum number of trees per item is 33, + // and slowdown from areas of the queue with few cache misses + // to areas where the MinLA approximation does poorly is + // around ~300×. And I don't think it's possible to come up + // with a better solution without going O(n^2), which is + // prohibitive when there are 4 million items in the + // EXTENT_TREE. + // + // The *right* question involves backing up and revisiting the + // assumption that it's *items* that we're sorting. + // + // Instead, let's allow items in the EXTENT_TREE to be visited + // more than once; have an entry in the queue for each + // ExtentDataRef within an item. Sure, that'll cause some + // inefficiency because EXTENT_ITEMs and METADATA_ITEMs will + // need to be read more than once. But that's a ~30× + // slowdown, and allows us to just sort those queue-entries + // near the trees being back-referenced. A ~30× slowdown is a + // heck of a lot better than a ~300× slowdown. And we don't + // have to try to solve a problem that's NP-hard. + + ctx = dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.process.substep", "sort") + dlog.Info(ctx, "building ordered queue...") + + dlog.Infof(ctx, "... walking %d items...", len(unorderedQueue)) + + // Don't worry about bailing if there is a failure to get the + // EXTENT_TREE; if that fails, then there can't be any items + // in the EXTENT_TREE for us to have to handle special, and + // all of the following code will fall through common-path. + extentTree := o.rebuilt.RebuiltTree(ctx, btrfsprim.EXTENT_TREE_OBJECTID) + var extentItems *containers.SortedMap[btrfsprim.Key, btrfsutil.ItemPtr] + if extentTree != nil { + extentItems = extentTree.RebuiltAcquireItems(ctx) + defer extentTree.RebuiltReleaseItems() + } + + orderedQueue := make([]itemToVisit, 0, len(unorderedQueue)) + for itemKey := range unorderedQueue { + if itemKey.TreeID == btrfsprim.EXTENT_TREE_OBJECTID && (itemKey.ItemType == btrfsprim.EXTENT_ITEM_KEY || + itemKey.ItemType == btrfsprim.METADATA_ITEM_KEY || + itemKey.ItemType == btrfsprim.EXTENT_DATA_REF_KEY) { + ptr, _ := extentItems.Load(itemKey.Key) + for i, treeID := range o.scan.DataBackrefs[ptr] { + orderedQueue = append(orderedQueue, itemToVisit{ + keyAndTree: itemKey, + SortTreeID: treeID, + RefNum: i, + }) + } + } else { + orderedQueue = append(orderedQueue, itemToVisit{ + keyAndTree: itemKey, + SortTreeID: itemKey.TreeID, + }) + } + } + + dlog.Infof(ctx, "... sorting %d queue entries...", len(orderedQueue)) + sort.Slice(orderedQueue, func(i, j int) bool { + return orderedQueue[i].Compare(orderedQueue[j]) < 0 + }) + + dlog.Info(ctx, "... done") + + return orderedQueue +} + type processItemStats struct { textui.Portion[int] NumAugments int @@ -241,11 +388,8 @@ func (s processItemStats) String() string { func (o *rebuilder) processSettledItemQueue(ctx context.Context) error { ctx = dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.substep", "process-items") - queue := maps.Keys(o.settledItemQueue) + queue := o.sortSettledItemQueue(ctx, o.settledItemQueue) o.settledItemQueue = make(containers.Set[keyAndTree]) - sort.Slice(queue, func(i, j int) bool { - return queue[i].Compare(queue[j]) < 0 - }) var progress processItemStats progress.D = len(queue) @@ -254,23 +398,46 @@ func (o *rebuilder) processSettledItemQueue(ctx context.Context) error { defer progressWriter.Done() ctx = dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.substep.progress", &progress) + progressWriter.Set(progress) type keyAndBody struct { - keyAndTree + itemToVisit Body btrfsitem.Item } itemChan := make(chan keyAndBody, textui.Tunable(300)) // average items-per-node≈100; let's have a buffer of ~3 nodes grp := dgroup.NewGroup(ctx, dgroup.GroupConfig{}) grp.Go("io", func(ctx context.Context) error { defer close(itemChan) + nextKey: for _, key := range queue { if err := ctx.Err(); err != nil { return err } ctx := dlog.WithField(ctx, "btrfs.inspect.rebuild-trees.rebuild.process.item", key) item := keyAndBody{ - keyAndTree: key, - Body: o.rebuilt.RebuiltTree(ctx, key.TreeID).ReadItem(ctx, key.Key), + itemToVisit: key, + Body: o.rebuilt.RebuiltTree(ctx, key.TreeID).ReadItem(ctx, key.Key), + } + if key.TreeID == btrfsprim.EXTENT_TREE_OBJECTID && + (key.ItemType == btrfsprim.EXTENT_ITEM_KEY || key.ItemType == btrfsprim.METADATA_ITEM_KEY) { + switch itemBody := item.Body.(type) { + case *btrfsitem.Extent: + item.Body = itemBody.Refs[key.RefNum].Body + if item.Body == nil { + continue nextKey + } + case *btrfsitem.Metadata: + item.Body = itemBody.Refs[key.RefNum].Body + if item.Body == nil { + continue nextKey + } + case *btrfsitem.Error: + // do nothing + default: + // This is a panic because the item decoder should not emit a new + // type to ref.Body without this code also being updated. + panic(fmt.Errorf("should not happen: unexpected type %T for %v", itemBody, key.ItemType)) + } } select { case itemChan <- item: diff --git a/cmd/btrfs-rec/inspect/rebuildtrees/scan.go b/cmd/btrfs-rec/inspect/rebuildtrees/scan.go index 7518896..72557ea 100644 --- a/cmd/btrfs-rec/inspect/rebuildtrees/scan.go +++ b/cmd/btrfs-rec/inspect/rebuildtrees/scan.go @@ -31,12 +31,21 @@ type FlagsAndErr struct { Err error } +// ExtentDataRefPtr is a pointer to a *btrfsitem.ExtentDataRef, +// whether it be to an EXTENT_DATA_REF item proper, or to an inline +// ref inside of another EXTENT_ITEM or METADATA_ITEM. +type ExtentDataRefPtr struct { + btrfsutil.ItemPtr + RefNum int // Only for EXTENT_ITEM and METADATA_ITEM +} + type ScanDevicesResult struct { Graph btrfsutil.Graph - Flags map[btrfsutil.ItemPtr]FlagsAndErr // INODE_ITEM - Names map[btrfsutil.ItemPtr][]byte // DIR_INDEX - Sizes map[btrfsutil.ItemPtr]SizeAndErr // EXTENT_CSUM and EXTENT_DATA + Flags map[btrfsutil.ItemPtr]FlagsAndErr // INODE_ITEM + Names map[btrfsutil.ItemPtr][]byte // DIR_INDEX + Sizes map[btrfsutil.ItemPtr]SizeAndErr // EXTENT_CSUM and EXTENT_DATA + DataBackrefs map[btrfsutil.ItemPtr][]btrfsprim.ObjID // EXTENT_DATA_REF, EXTENT_ITEM, and METADATA_ITEM } func ScanDevices(_ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.LogicalAddr) (ScanDevicesResult, error) { @@ -52,9 +61,11 @@ func ScanDevices(_ctx context.Context, fs *btrfs.FS, nodeList []btrfsvol.Logical ctx = dlog.WithField(_ctx, "btrfs.inspect.rebuild-trees.read.substep", "read-roots") ret := ScanDevicesResult{ Graph: btrfsutil.NewGraph(ctx, *sb), - Flags: make(map[btrfsutil.ItemPtr]FlagsAndErr), - Names: make(map[btrfsutil.ItemPtr][]byte), - Sizes: make(map[btrfsutil.ItemPtr]SizeAndErr), + + Flags: make(map[btrfsutil.ItemPtr]FlagsAndErr), + Names: make(map[btrfsutil.ItemPtr][]byte), + Sizes: make(map[btrfsutil.ItemPtr]SizeAndErr), + DataBackrefs: make(map[btrfsutil.ItemPtr][]btrfsprim.ObjID), } // read-nodes ////////////////////////////////////////////////////////////////// @@ -128,6 +139,22 @@ func (o *ScanDevicesResult) insertNode(node *btrfstree.Node) { Size: uint64(size), Err: err, } + case *btrfsitem.Extent: + o.DataBackrefs[ptr] = make([]btrfsprim.ObjID, len(itemBody.Refs)) + for i, ref := range itemBody.Refs { + if refBody, ok := ref.Body.(*btrfsitem.ExtentDataRef); ok { + o.DataBackrefs[ptr][i] = refBody.Root + } + } + case *btrfsitem.Metadata: + o.DataBackrefs[ptr] = make([]btrfsprim.ObjID, len(itemBody.Refs)) + for i, ref := range itemBody.Refs { + if refBody, ok := ref.Body.(*btrfsitem.ExtentDataRef); ok { + o.DataBackrefs[ptr][i] = refBody.Root + } + } + case *btrfsitem.ExtentDataRef: + o.DataBackrefs[ptr] = []btrfsprim.ObjID{itemBody.Root} case *btrfsitem.Error: switch item.Key.ItemType { case btrfsprim.INODE_ITEM_KEY: diff --git a/lib/textui/log.go b/lib/textui/log.go index e5d3f60..25bb4be 100644 --- a/lib/textui/log.go +++ b/lib/textui/log.go @@ -319,6 +319,8 @@ func fieldOrd(key string) int { case "btrfs.inspect.rebuild-trees.rebuild.settle.item": return -25 // step=rebuild, substep=process-items (2b/3) + case "btrfs.inspect.rebuild-trees.rebuild.process.substep": + return -26 case "btrfs.inspect.rebuild-trees.rebuild.process.item": return -25 // step=rebuild, substep=apply-augments (3/3) -- cgit v1.1-4-g5e80