From af795797565e642a9e7fd2a962add8e8b4d5756b Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Wed, 21 Dec 2022 01:11:05 -0700
Subject: rebuildnodes/btrees: Implement a new rebuilt-trees system

This will replace its use of btrfsutil.NewBrokenTrees.
---
 .../rebuildnodes/btrees/rebuilt_btrees.go          | 343 +++++++++++++++++++++
 1 file changed, 343 insertions(+)
 create mode 100644 lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
new file mode 100644
index 0000000..28d93b9
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -0,0 +1,343 @@
+// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package btrees
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/datawire/dlib/dlog"
+
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph"
+	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
+	"git.lukeshu.com/btrfs-progs-ng/lib/diskio"
+	"git.lukeshu.com/btrfs-progs-ng/lib/slices"
+	"git.lukeshu.com/btrfs-progs-ng/lib/textui"
+)
+
+type itemPtr struct {
+	Node btrfsvol.LogicalAddr
+	Idx  int
+}
+
+type rebuiltTree struct {
+	// static
+	ID     btrfsprim.ObjID
+	UUID   btrfsprim.UUID
+	Parent *rebuiltTree
+
+	// mutable
+	Roots containers.Set[btrfsvol.LogicalAddr]
+	Items containers.SortedMap[btrfsprim.Key, itemPtr]
+}
+
+// isOwnerOK returns whether it is permissible for a node with
+// .Head.Owner=owner to be in this tree.
+func (t *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
+	for {
+		if t == nil {
+			return false
+		}
+		if owner == t.ID {
+			return true
+		}
+		t = t.Parent
+	}
+}
+
+// RebuiltTrees is an abstraction for rebuilding and accessing
+// potentially broken btrees.
+//
+// It is conceptually a btrfstree.TreeOperator, and adds similar
+// broken-tree handling to btrfsutil.BrokenTrees.  However, the API is
+// different thant btrfstree.TreeOperator, and is much more efficient
+// than btrfsutil.BrokenTrees.
+//
+// The efficiency improvements are possible because of the API
+// differences, which are necessary for how it is used in
+// rebuildnodes:
+//
+//   - it consumes an already-read graph.Graph instead of reading the
+//     graph itself
+//
+//   - it does not use `btrfstree.TreePath`
+//
+//   - it does not keep track of errors encountered in a tree
+//
+// A zero RebuiltTrees is invalid; it must be initialized with
+// NewRebuiltTrees().
+type RebuiltTrees struct {
+	// static
+	rawFile diskio.File[btrfsvol.LogicalAddr]
+	sb      btrfstree.Superblock
+	graph   graph.Graph
+
+	// static callbacks
+	cbAddedItem  func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
+	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool)
+	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool)
+
+	// mutable
+	trees map[btrfsprim.ObjID]*rebuiltTree
+}
+
+// NewRebuiltTrees returns a new RebuiltTrees instance.  All of the
+// callbacks must be non-nil.
+func NewRebuiltTrees(
+	file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph graph.Graph,
+	cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key),
+	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool),
+	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool),
+) *RebuiltTrees {
+	return &RebuiltTrees{
+		rawFile: file,
+		sb:      sb,
+		graph:   graph,
+
+		cbAddedItem:  cbAddedItem,
+		cbLookupRoot: cbLookupRoot,
+		cbLookupUUID: cbLookupUUID,
+
+		trees: make(map[btrfsprim.ObjID]*rebuiltTree),
+	}
+}
+
+func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] {
+	graphInfo, ok := ts.graph.Nodes[laddr]
+	if !ok {
+		panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr))
+	}
+	ref, err := btrfstree.ReadNode(ts.rawFile, ts.sb, laddr, btrfstree.NodeExpectations{
+		LAddr:      containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr},
+		Level:      containers.Optional[uint8]{OK: true, Val: graphInfo.Level},
+		Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: graphInfo.Generation},
+		Owner: func(treeID btrfsprim.ObjID) error {
+			if treeID != graphInfo.Owner {
+				return fmt.Errorf("expected owner=%v but claims to have owner=%v",
+					graphInfo.Owner, treeID)
+			}
+			return nil
+		},
+		MinItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MinItem},
+		MaxItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MaxItem},
+	})
+	if err != nil {
+		panic(fmt.Errorf("should not happen: i/o error: %w", err))
+	}
+	return ref
+}
+
+func walk(graph graph.Graph, root btrfsvol.LogicalAddr, visited containers.Set[btrfsvol.LogicalAddr], fn func(btrfsvol.LogicalAddr) bool) {
+	if _, ok := graph.Nodes[root]; !ok {
+		return
+	}
+	if visited.Has(root) {
+		return
+	}
+	defer visited.Insert(root)
+	if !fn(root) {
+		return
+	}
+	for _, kp := range graph.EdgesFrom[root] {
+		walk(graph, kp.ToNode, visited, fn)
+	}
+}
+
+type rootStats struct {
+	TreeID   btrfsprim.ObjID
+	RootNode btrfsvol.LogicalAddr
+
+	VisitedNodes int
+	TotalNodes   int
+	AddedItems   int
+}
+
+func (s rootStats) String() string {
+	return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items)",
+		s.TreeID, s.RootNode,
+		int(100*float64(s.VisitedNodes)/float64(s.TotalNodes)),
+		s.VisitedNodes, s.TotalNodes,
+		s.AddedItems)
+}
+
+// AddRoot adds an additional root node to an existing tree.  It is
+// useful to call .AddRoot() to re-attach part of the tree that has
+// been broken off.
+//
+// It is invalid (panic) to call AddRoot for a tree without having
+// called AddTree first.
+func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, rootNode btrfsvol.LogicalAddr) {
+	tree := ts.trees[treeID]
+	tree.Roots.Insert(rootNode)
+
+	visited := make(containers.Set[btrfsvol.LogicalAddr])
+	progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second)
+	numAdded := 0
+	progress := func() {
+		progressWriter.Set(rootStats{
+			TreeID:       treeID,
+			RootNode:     rootNode,
+			VisitedNodes: len(visited),
+			TotalNodes:   len(ts.graph.Nodes),
+			AddedItems:   numAdded,
+		})
+	}
+	progress()
+	walk(ts.graph, rootNode, visited, func(node btrfsvol.LogicalAddr) bool {
+		progress()
+		if !tree.isOwnerOK(ts.graph.Nodes[node].Owner) {
+			return false
+		}
+		if ts.graph.Nodes[node].Level == 0 {
+			for i, item := range ts.readNode(node).Data.BodyLeaf {
+				if _, exists := tree.Items.Load(item.Key); exists {
+					// This is a panic because I'm not really sure what the best way to
+					// handle this is, and so if this happens I want the program to crash
+					// and force me to figure out how to handle it.
+					panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID))
+				}
+				tree.Items.Store(item.Key, itemPtr{
+					Node: node,
+					Idx:  i,
+				})
+				numAdded++
+				ts.cbAddedItem(ctx, treeID, item.Key)
+			}
+		}
+		return true
+	})
+	progress()
+	progressWriter.Done()
+}
+
+// AddTree initializes the given tree, returning true if it was able
+// to do so, or false if there was a problem and nothing was done.
+// The tree is initialized with the normal root node of the tree.
+//
+// Subsequent calls to AddTree for the same tree are no-ops.
+func (ts *RebuiltTrees) AddTree(ctx context.Context, treeID btrfsprim.ObjID) (ok bool) {
+	return ts.addTree(ctx, treeID, nil)
+}
+
+func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, stack []btrfsprim.ObjID) (ok bool) {
+	if _, ok := ts.trees[treeID]; ok {
+		return true
+	}
+	if slices.Contains(treeID, stack) {
+		return false
+	}
+	tree := &rebuiltTree{
+		ID:    treeID,
+		Roots: make(containers.Set[btrfsvol.LogicalAddr]),
+	}
+	var root btrfsvol.LogicalAddr
+	switch treeID {
+	case btrfsprim.ROOT_TREE_OBJECTID:
+		root = ts.sb.RootTree
+	case btrfsprim.CHUNK_TREE_OBJECTID:
+		root = ts.sb.ChunkTree
+	case btrfsprim.TREE_LOG_OBJECTID:
+		root = ts.sb.LogTree
+	case btrfsprim.BLOCK_GROUP_TREE_OBJECTID:
+		root = ts.sb.BlockGroupRoot
+	default:
+		stack := append(stack, treeID)
+		if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) {
+			return false
+		}
+		rootItem, ok := ts.cbLookupRoot(ctx, treeID)
+		if !ok {
+			return false
+		}
+		root = rootItem.ByteNr
+		tree.UUID = rootItem.UUID
+		if rootItem.ParentUUID != (btrfsprim.UUID{}) {
+			if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) {
+				return false
+			}
+			parentID, ok := ts.cbLookupUUID(ctx, rootItem.ParentUUID)
+			if !ok {
+				return false
+			}
+			if !ts.addTree(ctx, parentID, append(stack, treeID)) {
+				return false
+			}
+			tree.Parent = ts.trees[parentID]
+		}
+	}
+	ts.trees[treeID] = tree
+	if root != 0 {
+		ts.AddRoot(ctx, treeID, root)
+	}
+	return true
+}
+
+// Load reads an item from a tree.
+//
+// It is not nescessary to call AddTree for that tree first; Load will
+// call it for you.
+func (ts *RebuiltTrees) Load(ctx context.Context, treeID btrfsprim.ObjID, key btrfsprim.Key) (item btrfsitem.Item, ok bool) {
+	if !ts.AddTree(ctx, treeID) {
+		return nil, false
+	}
+	ptr, ok := ts.trees[treeID].Items.Load(key)
+	if !ok {
+		return nil, false
+	}
+	return ts.readNode(ptr.Node).Data.BodyLeaf[ptr.Idx].Body, true
+}
+
+// Search searches for an item from a tree.
+//
+// It is not nescessary to call AddTree for that tree first; Search
+// will call it for you.
+func (ts *RebuiltTrees) Search(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) (key btrfsprim.Key, ok bool) {
+	if !ts.AddTree(ctx, treeID) {
+		return btrfsprim.Key{}, false
+	}
+	k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ itemPtr) int {
+		return fn(k)
+	})
+	return k, ok
+}
+
+// Search searches for a range of items from a tree.
+//
+// It is not nescessary to call AddTree for that tree first; SearchAll
+// will call it for you.
+func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, fn func(btrfsprim.Key) int) []btrfsprim.Key {
+	if !ts.AddTree(ctx, treeID) {
+		return nil
+	}
+	kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ itemPtr) int {
+		return fn(k)
+	})
+	if len(kvs) == 0 {
+		return nil
+	}
+	ret := make([]btrfsprim.Key, len(kvs))
+	for i := range kvs {
+		ret[i] = kvs[i].K
+	}
+	return ret
+}
+
+// ListRoots returns a listing of all initialized trees and their root
+// nodes.
+//
+// Do not mutate the set of roots for a tree; it is a pointer to the
+// RebuiltTrees' internal set!
+func (ts *RebuiltTrees) ListRoots() map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr] {
+	ret := make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], len(ts.trees))
+	for treeID := range ts.trees {
+		ret[treeID] = ts.trees[treeID].Roots
+	}
+	return ret
+}
-- 
cgit v1.2.3-2-g168b


From 3f88bf0926046b245844a72a2a0c97654a410f0a Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Wed, 21 Dec 2022 02:41:45 -0700
Subject: rebuildnodes/btrees: Cache nodes

---
 .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go     | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index 28d93b9..dea7ec3 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -85,7 +85,8 @@ type RebuiltTrees struct {
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool)
 
 	// mutable
-	trees map[btrfsprim.ObjID]*rebuiltTree
+	trees     map[btrfsprim.ObjID]*rebuiltTree
+	nodeCache *containers.LRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]]
 }
 
 // NewRebuiltTrees returns a new RebuiltTrees instance.  All of the
@@ -105,15 +106,21 @@ func NewRebuiltTrees(
 		cbLookupRoot: cbLookupRoot,
 		cbLookupUUID: cbLookupUUID,
 
-		trees: make(map[btrfsprim.ObjID]*rebuiltTree),
+		trees:     make(map[btrfsprim.ObjID]*rebuiltTree),
+		nodeCache: containers.NewLRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]](8),
 	}
 }
 
 func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] {
+	if cached, ok := ts.nodeCache.Get(laddr); ok {
+		return cached
+	}
+
 	graphInfo, ok := ts.graph.Nodes[laddr]
 	if !ok {
 		panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr))
 	}
+
 	ref, err := btrfstree.ReadNode(ts.rawFile, ts.sb, laddr, btrfstree.NodeExpectations{
 		LAddr:      containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr},
 		Level:      containers.Optional[uint8]{OK: true, Val: graphInfo.Level},
@@ -131,6 +138,9 @@ func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvo
 	if err != nil {
 		panic(fmt.Errorf("should not happen: i/o error: %w", err))
 	}
+
+	ts.nodeCache.Add(laddr, ref)
+
 	return ref
 }
 
-- 
cgit v1.2.3-2-g168b


From 083444569ac882c470da8adee8059989ba176553 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Wed, 21 Dec 2022 03:59:49 -0700
Subject: rebuildnodes/btrees: Index all (orphaned) leafs

---
 .../rebuildnodes/btrees/rebuilt_btrees.go          | 195 +++++++++++++++------
 1 file changed, 138 insertions(+), 57 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index dea7ec3..6e68a84 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -15,9 +15,10 @@ import (
 	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
 	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree"
 	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
-	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph"
+	pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph"
 	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
 	"git.lukeshu.com/btrfs-progs-ng/lib/diskio"
+	"git.lukeshu.com/btrfs-progs-ng/lib/maps"
 	"git.lukeshu.com/btrfs-progs-ng/lib/slices"
 	"git.lukeshu.com/btrfs-progs-ng/lib/textui"
 )
@@ -33,6 +34,9 @@ type rebuiltTree struct {
 	UUID   btrfsprim.UUID
 	Parent *rebuiltTree
 
+	// all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree
+	leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]
+
 	// mutable
 	Roots containers.Set[btrfsvol.LogicalAddr]
 	Items containers.SortedMap[btrfsprim.Key, itemPtr]
@@ -40,15 +44,15 @@ type rebuiltTree struct {
 
 // isOwnerOK returns whether it is permissible for a node with
 // .Head.Owner=owner to be in this tree.
-func (t *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
+func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
 	for {
-		if t == nil {
+		if tree == nil {
 			return false
 		}
-		if owner == t.ID {
+		if owner == tree.ID {
 			return true
 		}
-		t = t.Parent
+		tree = tree.Parent
 	}
 }
 
@@ -71,13 +75,19 @@ func (t *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
 //
 //   - it does not keep track of errors encountered in a tree
 //
+// Additionally, it provides a piece of functionality that
+// btrfsutil.BrokenTrees does not:
+//
+//   - it provides a .LeafToRoots() method to advise on what
+//     additional roots should be added
+//
 // A zero RebuiltTrees is invalid; it must be initialized with
 // NewRebuiltTrees().
 type RebuiltTrees struct {
 	// static
 	rawFile diskio.File[btrfsvol.LogicalAddr]
 	sb      btrfstree.Superblock
-	graph   graph.Graph
+	graph   pkggraph.Graph
 
 	// static callbacks
 	cbAddedItem  func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
@@ -92,7 +102,7 @@ type RebuiltTrees struct {
 // NewRebuiltTrees returns a new RebuiltTrees instance.  All of the
 // callbacks must be non-nil.
 func NewRebuiltTrees(
-	file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph graph.Graph,
+	file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph pkggraph.Graph,
 	cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key),
 	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool),
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool),
@@ -144,36 +154,20 @@ func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvo
 	return ref
 }
 
-func walk(graph graph.Graph, root btrfsvol.LogicalAddr, visited containers.Set[btrfsvol.LogicalAddr], fn func(btrfsvol.LogicalAddr) bool) {
-	if _, ok := graph.Nodes[root]; !ok {
-		return
-	}
-	if visited.Has(root) {
-		return
-	}
-	defer visited.Insert(root)
-	if !fn(root) {
-		return
-	}
-	for _, kp := range graph.EdgesFrom[root] {
-		walk(graph, kp.ToNode, visited, fn)
-	}
-}
-
 type rootStats struct {
 	TreeID   btrfsprim.ObjID
 	RootNode btrfsvol.LogicalAddr
 
-	VisitedNodes int
-	TotalNodes   int
-	AddedItems   int
+	DoneLeafs  int
+	TotalLeafs int
+	AddedItems int
 }
 
 func (s rootStats) String() string {
 	return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items)",
 		s.TreeID, s.RootNode,
-		int(100*float64(s.VisitedNodes)/float64(s.TotalNodes)),
-		s.VisitedNodes, s.TotalNodes,
+		int(100*float64(s.DoneLeafs)/float64(s.TotalLeafs)),
+		s.DoneLeafs, s.TotalLeafs,
 		s.AddedItems)
 }
 
@@ -187,43 +181,40 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 	tree := ts.trees[treeID]
 	tree.Roots.Insert(rootNode)
 
-	visited := make(containers.Set[btrfsvol.LogicalAddr])
 	progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second)
 	numAdded := 0
-	progress := func() {
+	progress := func(done int) {
 		progressWriter.Set(rootStats{
-			TreeID:       treeID,
-			RootNode:     rootNode,
-			VisitedNodes: len(visited),
-			TotalNodes:   len(ts.graph.Nodes),
-			AddedItems:   numAdded,
+			TreeID:     treeID,
+			RootNode:   rootNode,
+			DoneLeafs:  done,
+			TotalLeafs: len(tree.leafToRoots),
+			AddedItems: numAdded,
 		})
 	}
-	progress()
-	walk(ts.graph, rootNode, visited, func(node btrfsvol.LogicalAddr) bool {
-		progress()
-		if !tree.isOwnerOK(ts.graph.Nodes[node].Owner) {
-			return false
+	for i, leaf := range maps.SortedKeys(tree.leafToRoots) {
+		progress(i)
+		roots := tree.leafToRoots[leaf]
+		if !roots.Has(rootNode) {
+			continue
 		}
-		if ts.graph.Nodes[node].Level == 0 {
-			for i, item := range ts.readNode(node).Data.BodyLeaf {
-				if _, exists := tree.Items.Load(item.Key); exists {
-					// This is a panic because I'm not really sure what the best way to
-					// handle this is, and so if this happens I want the program to crash
-					// and force me to figure out how to handle it.
-					panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID))
-				}
-				tree.Items.Store(item.Key, itemPtr{
-					Node: node,
-					Idx:  i,
-				})
-				numAdded++
-				ts.cbAddedItem(ctx, treeID, item.Key)
+		for j, item := range ts.readNode(leaf).Data.BodyLeaf {
+			if _, exists := tree.Items.Load(item.Key); exists {
+				// This is a panic because I'm not really sure what the best way to
+				// handle this is, and so if this happens I want the program to crash
+				// and force me to figure out how to handle it.
+				panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID))
 			}
+			tree.Items.Store(item.Key, itemPtr{
+				Node: leaf,
+				Idx:  j,
+			})
+			numAdded++
+			ts.cbAddedItem(ctx, treeID, item.Key)
+			progress(i)
 		}
-		return true
-	})
-	progress()
+	}
+	progress(len(tree.leafToRoots))
 	progressWriter.Done()
 }
 
@@ -243,6 +234,7 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta
 	if slices.Contains(treeID, stack) {
 		return false
 	}
+
 	tree := &rebuiltTree{
 		ID:    treeID,
 		Roots: make(containers.Set[btrfsvol.LogicalAddr]),
@@ -282,13 +274,81 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta
 			tree.Parent = ts.trees[parentID]
 		}
 	}
+	tree.indexLeafs(ctx, ts.graph)
+
 	ts.trees[treeID] = tree
 	if root != 0 {
 		ts.AddRoot(ctx, treeID, root)
 	}
+
 	return true
 }
 
+type indexStats struct {
+	TreeID     btrfsprim.ObjID
+	DoneNodes  int
+	TotalNodes int
+}
+
+func (s indexStats) String() string {
+	return fmt.Sprintf("tree %v: indexing leaf nodes: %v%% (%v/%v)",
+		s.TreeID,
+		int(100*float64(s.DoneNodes)/float64(s.TotalNodes)),
+		s.DoneNodes, s.TotalNodes)
+}
+
+func (tree *rebuiltTree) indexLeafs(ctx context.Context, graph pkggraph.Graph) {
+	nodeToRoots := make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr])
+	progressWriter := textui.NewProgress[indexStats](ctx, dlog.LogLevelInfo, 1*time.Second)
+	progress := func() {
+		progressWriter.Set(indexStats{
+			TreeID:     tree.ID,
+			DoneNodes:  len(nodeToRoots),
+			TotalNodes: len(graph.Nodes),
+		})
+	}
+	progress()
+	for _, node := range maps.SortedKeys(graph.Nodes) {
+		tree.indexNode(graph, node, nodeToRoots, progress, nil)
+	}
+	progressWriter.Done()
+
+	tree.leafToRoots = make(map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr])
+	for node, roots := range nodeToRoots {
+		if graph.Nodes[node].Level == 0 && len(roots) > 0 {
+			tree.leafToRoots[node] = roots
+		}
+	}
+}
+
+func (tree *rebuiltTree) indexNode(graph pkggraph.Graph, node btrfsvol.LogicalAddr, index map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr], progress func(), stack []btrfsvol.LogicalAddr) {
+	defer progress()
+	if _, done := index[node]; done {
+		return
+	}
+	if slices.Contains(node, stack) {
+		return
+	}
+	if !tree.isOwnerOK(graph.Nodes[node].Owner) {
+		index[node] = nil
+		return
+	}
+	kps := slices.RemoveAllFunc(graph.EdgesTo[node], func(kp *pkggraph.Edge) bool {
+		return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner)
+	})
+	if len(kps) == 0 {
+		index[node] = containers.NewSet[btrfsvol.LogicalAddr](node)
+		return
+	}
+	stack = append(stack, node)
+	roots := make(containers.Set[btrfsvol.LogicalAddr])
+	for _, kp := range kps {
+		tree.indexNode(graph, kp.FromNode, index, progress, stack)
+		roots.InsertFrom(index[kp.FromNode])
+	}
+	index[node] = roots
+}
+
 // Load reads an item from a tree.
 //
 // It is not nescessary to call AddTree for that tree first; Load will
@@ -339,6 +399,27 @@ func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, f
 	return ret
 }
 
+// LeafToRoots returns the list of potential roots (to pass to
+// .AddRoot) that include a given leaf-node.
+//
+// It is not nescessary to call AddTree for the tree first;
+// LeafToRoots will call it for you.
+func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID, leaf btrfsvol.LogicalAddr) containers.Set[btrfsvol.LogicalAddr] {
+	if !ts.AddTree(ctx, treeID) {
+		return nil
+	}
+	ret := make(containers.Set[btrfsvol.LogicalAddr])
+	for root := range ts.trees[treeID].leafToRoots[leaf] {
+		if !ts.trees[treeID].Roots.Has(root) {
+			ret.Insert(root)
+		}
+	}
+	if len(ret) == 0 {
+		return nil
+	}
+	return ret
+}
+
 // ListRoots returns a listing of all initialized trees and their root
 // nodes.
 //
-- 
cgit v1.2.3-2-g168b


From 30be1eacbe4ff253c82c6e6163234dc20b4de550 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Wed, 21 Dec 2022 05:30:25 -0700
Subject: rebuildnodes: Refactor existing key-io code in to a sub-package

---
 .../rebuildnodes/btrees/rebuilt_btrees.go          | 72 +++++-----------------
 1 file changed, 16 insertions(+), 56 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index 6e68a84..613f3ca 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -16,18 +16,13 @@ import (
 	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree"
 	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
 	pkggraph "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/keyio"
 	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
-	"git.lukeshu.com/btrfs-progs-ng/lib/diskio"
 	"git.lukeshu.com/btrfs-progs-ng/lib/maps"
 	"git.lukeshu.com/btrfs-progs-ng/lib/slices"
 	"git.lukeshu.com/btrfs-progs-ng/lib/textui"
 )
 
-type itemPtr struct {
-	Node btrfsvol.LogicalAddr
-	Idx  int
-}
-
 type rebuiltTree struct {
 	// static
 	ID     btrfsprim.ObjID
@@ -39,7 +34,7 @@ type rebuiltTree struct {
 
 	// mutable
 	Roots containers.Set[btrfsvol.LogicalAddr]
-	Items containers.SortedMap[btrfsprim.Key, itemPtr]
+	Items containers.SortedMap[btrfsprim.Key, keyio.ItemPtr]
 }
 
 // isOwnerOK returns whether it is permissible for a node with
@@ -85,9 +80,9 @@ func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
 // NewRebuiltTrees().
 type RebuiltTrees struct {
 	// static
-	rawFile diskio.File[btrfsvol.LogicalAddr]
-	sb      btrfstree.Superblock
-	graph   pkggraph.Graph
+	sb    btrfstree.Superblock
+	graph pkggraph.Graph
+	keyIO keyio.Handle
 
 	// static callbacks
 	cbAddedItem  func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
@@ -95,63 +90,28 @@ type RebuiltTrees struct {
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool)
 
 	// mutable
-	trees     map[btrfsprim.ObjID]*rebuiltTree
-	nodeCache *containers.LRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]]
+	trees map[btrfsprim.ObjID]*rebuiltTree
 }
 
 // NewRebuiltTrees returns a new RebuiltTrees instance.  All of the
 // callbacks must be non-nil.
 func NewRebuiltTrees(
-	file diskio.File[btrfsvol.LogicalAddr], sb btrfstree.Superblock, graph pkggraph.Graph,
+	sb btrfstree.Superblock, graph pkggraph.Graph, keyIO keyio.Handle,
 	cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key),
 	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool),
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool),
 ) *RebuiltTrees {
 	return &RebuiltTrees{
-		rawFile: file,
-		sb:      sb,
-		graph:   graph,
+		sb:    sb,
+		graph: graph,
+		keyIO: keyIO,
 
 		cbAddedItem:  cbAddedItem,
 		cbLookupRoot: cbLookupRoot,
 		cbLookupUUID: cbLookupUUID,
 
-		trees:     make(map[btrfsprim.ObjID]*rebuiltTree),
-		nodeCache: containers.NewLRUCache[btrfsvol.LogicalAddr, *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node]](8),
-	}
-}
-
-func (ts *RebuiltTrees) readNode(laddr btrfsvol.LogicalAddr) *diskio.Ref[btrfsvol.LogicalAddr, btrfstree.Node] {
-	if cached, ok := ts.nodeCache.Get(laddr); ok {
-		return cached
-	}
-
-	graphInfo, ok := ts.graph.Nodes[laddr]
-	if !ok {
-		panic(fmt.Errorf("should not happen: node@%v is not mentioned in the in-memory graph", laddr))
+		trees: make(map[btrfsprim.ObjID]*rebuiltTree),
 	}
-
-	ref, err := btrfstree.ReadNode(ts.rawFile, ts.sb, laddr, btrfstree.NodeExpectations{
-		LAddr:      containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: laddr},
-		Level:      containers.Optional[uint8]{OK: true, Val: graphInfo.Level},
-		Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: graphInfo.Generation},
-		Owner: func(treeID btrfsprim.ObjID) error {
-			if treeID != graphInfo.Owner {
-				return fmt.Errorf("expected owner=%v but claims to have owner=%v",
-					graphInfo.Owner, treeID)
-			}
-			return nil
-		},
-		MinItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MinItem},
-		MaxItem: containers.Optional[btrfsprim.Key]{OK: true, Val: graphInfo.MaxItem},
-	})
-	if err != nil {
-		panic(fmt.Errorf("should not happen: i/o error: %w", err))
-	}
-
-	ts.nodeCache.Add(laddr, ref)
-
-	return ref
 }
 
 type rootStats struct {
@@ -198,14 +158,14 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 		if !roots.Has(rootNode) {
 			continue
 		}
-		for j, item := range ts.readNode(leaf).Data.BodyLeaf {
+		for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf {
 			if _, exists := tree.Items.Load(item.Key); exists {
 				// This is a panic because I'm not really sure what the best way to
 				// handle this is, and so if this happens I want the program to crash
 				// and force me to figure out how to handle it.
 				panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID))
 			}
-			tree.Items.Store(item.Key, itemPtr{
+			tree.Items.Store(item.Key, keyio.ItemPtr{
 				Node: leaf,
 				Idx:  j,
 			})
@@ -361,7 +321,7 @@ func (ts *RebuiltTrees) Load(ctx context.Context, treeID btrfsprim.ObjID, key bt
 	if !ok {
 		return nil, false
 	}
-	return ts.readNode(ptr.Node).Data.BodyLeaf[ptr.Idx].Body, true
+	return ts.keyIO.ReadItem(ptr)
 }
 
 // Search searches for an item from a tree.
@@ -372,7 +332,7 @@ func (ts *RebuiltTrees) Search(ctx context.Context, treeID btrfsprim.ObjID, fn f
 	if !ts.AddTree(ctx, treeID) {
 		return btrfsprim.Key{}, false
 	}
-	k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ itemPtr) int {
+	k, _, ok := ts.trees[treeID].Items.Search(func(k btrfsprim.Key, _ keyio.ItemPtr) int {
 		return fn(k)
 	})
 	return k, ok
@@ -386,7 +346,7 @@ func (ts *RebuiltTrees) SearchAll(ctx context.Context, treeID btrfsprim.ObjID, f
 	if !ts.AddTree(ctx, treeID) {
 		return nil
 	}
-	kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ itemPtr) int {
+	kvs := ts.trees[treeID].Items.SearchAll(func(k btrfsprim.Key, _ keyio.ItemPtr) int {
 		return fn(k)
 	})
 	if len(kvs) == 0 {
-- 
cgit v1.2.3-2-g168b


From 7620e1948a4d1e17458d8cfc9ed306bb774a3274 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Wed, 21 Dec 2022 04:30:56 -0700
Subject: rebuildnodes: Migrate to the new rebuilt-btrees system

---
 .../rebuildnodes/btrees/rebuilt_btrees.go          | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index 613f3ca..23e974e 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -51,6 +51,21 @@ func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
 	}
 }
 
+// cowDistance returns how many COW-snapshots down the 'tree' is from
+// the 'parent'.
+func (tree *rebuiltTree) cowDistance(parentID btrfsprim.ObjID) (dist int, ok bool) {
+	for {
+		if parentID == tree.ID {
+			return dist, true
+		}
+		if tree.Parent == nil {
+			return 0, false
+		}
+		tree = tree.Parent
+		dist++
+	}
+}
+
 // RebuiltTrees is an abstraction for rebuilding and accessing
 // potentially broken btrees.
 //
@@ -70,12 +85,15 @@ func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
 //
 //   - it does not keep track of errors encountered in a tree
 //
-// Additionally, it provides a piece of functionality that
+// Additionally, it provides some functionality that
 // btrfsutil.BrokenTrees does not:
 //
 //   - it provides a .LeafToRoots() method to advise on what
 //     additional roots should be added
 //
+//   - it provides a .COWDistance() method to compare how related two
+//     trees are
+//
 // A zero RebuiltTrees is invalid; it must be initialized with
 // NewRebuiltTrees().
 type RebuiltTrees struct {
@@ -380,6 +398,15 @@ func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID,
 	return ret
 }
 
+// COWDistance returns how many COW-snapshots down from the 'child'
+// tree is from the 'parent' tree.
+//
+// It is invalid (panic) to call COWDistance for a tree without having
+// called AddTree for the child first.
+func (ts *RebuiltTrees) COWDistance(ctx context.Context, childID, parentID btrfsprim.ObjID) (dist int, ok bool) {
+	return ts.trees[childID].cowDistance(parentID)
+}
+
 // ListRoots returns a listing of all initialized trees and their root
 // nodes.
 //
-- 
cgit v1.2.3-2-g168b


From ec31481b46fb772dfe2e977ef37fc0ace39fad8a Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Thu, 22 Dec 2022 16:30:27 -0700
Subject: rebuildnodes/btrees: Try to handle duplicate keys by generation

---
 .../rebuildnodes/btrees/rebuilt_btrees.go          | 56 ++++++++++++++--------
 1 file changed, 37 insertions(+), 19 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index 23e974e..cbe7df1 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -136,17 +136,18 @@ type rootStats struct {
 	TreeID   btrfsprim.ObjID
 	RootNode btrfsvol.LogicalAddr
 
-	DoneLeafs  int
-	TotalLeafs int
-	AddedItems int
+	DoneLeafs     int
+	TotalLeafs    int
+	AddedItems    int
+	ReplacedItems int
 }
 
 func (s rootStats) String() string {
-	return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items)",
+	return fmt.Sprintf("tree %v: adding root node@%v: %v%% (%v/%v) (added %v items, replaced %v items)",
 		s.TreeID, s.RootNode,
 		int(100*float64(s.DoneLeafs)/float64(s.TotalLeafs)),
 		s.DoneLeafs, s.TotalLeafs,
-		s.AddedItems)
+		s.AddedItems, s.ReplacedItems)
 }
 
 // AddRoot adds an additional root node to an existing tree.  It is
@@ -161,13 +162,15 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 
 	progressWriter := textui.NewProgress[rootStats](ctx, dlog.LogLevelInfo, 1*time.Second)
 	numAdded := 0
+	numReplaced := 0
 	progress := func(done int) {
 		progressWriter.Set(rootStats{
-			TreeID:     treeID,
-			RootNode:   rootNode,
-			DoneLeafs:  done,
-			TotalLeafs: len(tree.leafToRoots),
-			AddedItems: numAdded,
+			TreeID:        treeID,
+			RootNode:      rootNode,
+			DoneLeafs:     done,
+			TotalLeafs:    len(tree.leafToRoots),
+			AddedItems:    numAdded,
+			ReplacedItems: numReplaced,
 		})
 	}
 	for i, leaf := range maps.SortedKeys(tree.leafToRoots) {
@@ -177,17 +180,32 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 			continue
 		}
 		for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf {
-			if _, exists := tree.Items.Load(item.Key); exists {
-				// This is a panic because I'm not really sure what the best way to
-				// handle this is, and so if this happens I want the program to crash
-				// and force me to figure out how to handle it.
-				panic(fmt.Errorf("dup key=%v in tree=%v", item.Key, treeID))
-			}
-			tree.Items.Store(item.Key, keyio.ItemPtr{
+			newPtr := keyio.ItemPtr{
 				Node: leaf,
 				Idx:  j,
-			})
-			numAdded++
+			}
+			if oldPtr, exists := tree.Items.Load(item.Key); !exists {
+				tree.Items.Store(item.Key, newPtr)
+				numAdded++
+			} else {
+				oldGen := ts.graph.Nodes[oldPtr.Node].Generation
+				newGen := ts.graph.Nodes[newPtr.Node].Generation
+				switch {
+				case oldGen < newGen:
+					tree.Items.Store(item.Key, newPtr)
+					numReplaced++
+				case oldGen > newGen:
+					// keep the old one
+				case oldGen == newGen:
+					// This is a panic because I'm not really sure what the best way to
+					// handle this is, and so if this happens I want the program to crash
+					// and force me to figure out how to handle it.
+					panic(fmt.Errorf("dup key=%v in tree=%v: old=%v=%#v ; new=%v=%#v",
+						item.Key, treeID,
+						oldPtr, ts.graph.Nodes[oldPtr.Node],
+						newPtr, ts.graph.Nodes[newPtr.Node]))
+				}
+			}
 			ts.cbAddedItem(ctx, treeID, item.Key)
 			progress(i)
 		}
-- 
cgit v1.2.3-2-g168b


From 3e22a30a7febd9890163966d01dd2b34656c9e39 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Thu, 22 Dec 2022 18:24:03 -0700
Subject: rebuildnodes: Handle the same node being added twice

---
 .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go   | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index cbe7df1..a4ccd5e 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -34,6 +34,7 @@ type rebuiltTree struct {
 
 	// mutable
 	Roots containers.Set[btrfsvol.LogicalAddr]
+	Leafs containers.Set[btrfsvol.LogicalAddr]
 	Items containers.SortedMap[btrfsprim.Key, keyio.ItemPtr]
 }
 
@@ -175,10 +176,10 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 	}
 	for i, leaf := range maps.SortedKeys(tree.leafToRoots) {
 		progress(i)
-		roots := tree.leafToRoots[leaf]
-		if !roots.Has(rootNode) {
+		if tree.Leafs.Has(leaf) || !tree.leafToRoots[leaf].Has(rootNode) {
 			continue
 		}
+		tree.Leafs.Insert(leaf)
 		for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf {
 			newPtr := keyio.ItemPtr{
 				Node: leaf,
@@ -234,6 +235,7 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta
 	tree := &rebuiltTree{
 		ID:    treeID,
 		Roots: make(containers.Set[btrfsvol.LogicalAddr]),
+		Leafs: make(containers.Set[btrfsvol.LogicalAddr]),
 	}
 	var root btrfsvol.LogicalAddr
 	switch treeID {
@@ -404,11 +406,17 @@ func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID,
 	if !ts.AddTree(ctx, treeID) {
 		return nil
 	}
+	if ts.graph.Nodes[leaf].Level != 0 {
+		panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): not a leaf",
+			treeID, leaf))
+	}
 	ret := make(containers.Set[btrfsvol.LogicalAddr])
 	for root := range ts.trees[treeID].leafToRoots[leaf] {
-		if !ts.trees[treeID].Roots.Has(root) {
-			ret.Insert(root)
+		if ts.trees[treeID].Roots.Has(root) {
+			panic(fmt.Errorf("should not happen: NodeToRoots(tree=%v, leaf=%v): tree contains root=%v but not leaf",
+				treeID, leaf, root))
 		}
+		ret.Insert(root)
 	}
 	if len(ret) == 0 {
 		return nil
-- 
cgit v1.2.3-2-g168b


From fc6f8871c39d3a74153e4ce8fd440c256f65b650 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 23 Dec 2022 16:03:27 -0700
Subject: rebuildnodes/btrees: Track the tree's root item offset

---
 .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go     | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index a4ccd5e..3a6c35b 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -25,9 +25,10 @@ import (
 
 type rebuiltTree struct {
 	// static
-	ID     btrfsprim.ObjID
-	UUID   btrfsprim.UUID
-	Parent *rebuiltTree
+	ID        btrfsprim.ObjID
+	UUID      btrfsprim.UUID
+	Parent    *rebuiltTree
+	ParentGen btrfsprim.Generation // offset of this tree's root item
 
 	// all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree
 	leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]
@@ -105,7 +106,7 @@ type RebuiltTrees struct {
 
 	// static callbacks
 	cbAddedItem  func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
-	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool)
+	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool)
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool)
 
 	// mutable
@@ -117,7 +118,7 @@ type RebuiltTrees struct {
 func NewRebuiltTrees(
 	sb btrfstree.Superblock, graph pkggraph.Graph, keyIO keyio.Handle,
 	cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key),
-	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (item btrfsitem.Root, ok bool),
+	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool),
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool),
 ) *RebuiltTrees {
 	return &RebuiltTrees{
@@ -252,13 +253,14 @@ func (ts *RebuiltTrees) addTree(ctx context.Context, treeID btrfsprim.ObjID, sta
 		if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) {
 			return false
 		}
-		rootItem, ok := ts.cbLookupRoot(ctx, treeID)
+		rootOff, rootItem, ok := ts.cbLookupRoot(ctx, treeID)
 		if !ok {
 			return false
 		}
 		root = rootItem.ByteNr
 		tree.UUID = rootItem.UUID
 		if rootItem.ParentUUID != (btrfsprim.UUID{}) {
+			tree.ParentGen = rootOff
 			if !ts.addTree(ctx, btrfsprim.ROOT_TREE_OBJECTID, stack) {
 				return false
 			}
-- 
cgit v1.2.3-2-g168b


From 51d6ccf1e5af4d88bfe6842b06fa1bc2d5cc732c Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 23 Dec 2022 17:10:47 -0700
Subject: rebuildnodes: Have keyio.Handle always be a pointer, fuss with
 ScanDevices return type

---
 lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index 3a6c35b..1d5ba14 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -102,7 +102,7 @@ type RebuiltTrees struct {
 	// static
 	sb    btrfstree.Superblock
 	graph pkggraph.Graph
-	keyIO keyio.Handle
+	keyIO *keyio.Handle
 
 	// static callbacks
 	cbAddedItem  func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key)
@@ -116,7 +116,7 @@ type RebuiltTrees struct {
 // NewRebuiltTrees returns a new RebuiltTrees instance.  All of the
 // callbacks must be non-nil.
 func NewRebuiltTrees(
-	sb btrfstree.Superblock, graph pkggraph.Graph, keyIO keyio.Handle,
+	sb btrfstree.Superblock, graph pkggraph.Graph, keyIO *keyio.Handle,
 	cbAddedItem func(ctx context.Context, tree btrfsprim.ObjID, key btrfsprim.Key),
 	cbLookupRoot func(ctx context.Context, tree btrfsprim.ObjID) (offset btrfsprim.Generation, item btrfsitem.Root, ok bool),
 	cbLookupUUID func(ctx context.Context, uuid btrfsprim.UUID) (id btrfsprim.ObjID, ok bool),
-- 
cgit v1.2.3-2-g168b


From 8fc3c78924da1dedd3adce3b923adf58e5ca732a Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 23 Dec 2022 17:50:46 -0700
Subject: rebuildnodes: Have the graph store keys; avoid I/O when indexing a
 tree

---
 .../btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index 1d5ba14..f919b37 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -181,20 +181,20 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 			continue
 		}
 		tree.Leafs.Insert(leaf)
-		for j, item := range ts.keyIO.ReadNode(leaf).Data.BodyLeaf {
+		for j, itemKey := range ts.graph.Nodes[leaf].Items {
 			newPtr := keyio.ItemPtr{
 				Node: leaf,
 				Idx:  j,
 			}
-			if oldPtr, exists := tree.Items.Load(item.Key); !exists {
-				tree.Items.Store(item.Key, newPtr)
+			if oldPtr, exists := tree.Items.Load(itemKey); !exists {
+				tree.Items.Store(itemKey, newPtr)
 				numAdded++
 			} else {
 				oldGen := ts.graph.Nodes[oldPtr.Node].Generation
 				newGen := ts.graph.Nodes[newPtr.Node].Generation
 				switch {
 				case oldGen < newGen:
-					tree.Items.Store(item.Key, newPtr)
+					tree.Items.Store(itemKey, newPtr)
 					numReplaced++
 				case oldGen > newGen:
 					// keep the old one
@@ -203,12 +203,12 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 					// handle this is, and so if this happens I want the program to crash
 					// and force me to figure out how to handle it.
 					panic(fmt.Errorf("dup key=%v in tree=%v: old=%v=%#v ; new=%v=%#v",
-						item.Key, treeID,
+						itemKey, treeID,
 						oldPtr, ts.graph.Nodes[oldPtr.Node],
 						newPtr, ts.graph.Nodes[newPtr.Node]))
 				}
 			}
-			ts.cbAddedItem(ctx, treeID, item.Key)
+			ts.cbAddedItem(ctx, treeID, itemKey)
 			progress(i)
 		}
 	}
-- 
cgit v1.2.3-2-g168b


From 6330b26a9f9c7769c48e2bc90e065457f8e138ab Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 23 Dec 2022 16:48:42 -0700
Subject: rebuildnodes: Have the key index belong to the btree, and be smarter

---
 .../rebuildnodes/btrees/rebuilt_btrees.go          | 106 ++++++++++++++-------
 1 file changed, 74 insertions(+), 32 deletions(-)

(limited to 'lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees')

diff --git a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
index f919b37..50c75a4 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildnodes/btrees/rebuilt_btrees.go
@@ -32,6 +32,7 @@ type rebuiltTree struct {
 
 	// all leafs (lvl=0) that pass .isOwnerOK, even if not in the tree
 	leafToRoots map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]
+	keys        containers.SortedMap[btrfsprim.Key, keyio.ItemPtr]
 
 	// mutable
 	Roots containers.Set[btrfsvol.LogicalAddr]
@@ -41,14 +42,14 @@ type rebuiltTree struct {
 
 // isOwnerOK returns whether it is permissible for a node with
 // .Head.Owner=owner to be in this tree.
-func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID) bool {
+func (tree *rebuiltTree) isOwnerOK(owner btrfsprim.ObjID, gen btrfsprim.Generation) bool {
 	for {
-		if tree == nil {
-			return false
-		}
 		if owner == tree.ID {
 			return true
 		}
+		if tree.Parent == nil || gen >= tree.ParentGen {
+			return false
+		}
 		tree = tree.Parent
 	}
 }
@@ -68,6 +69,38 @@ func (tree *rebuiltTree) cowDistance(parentID btrfsprim.ObjID) (dist int, ok boo
 	}
 }
 
+func (tree *rebuiltTree) shouldReplace(graph pkggraph.Graph, oldNode, newNode btrfsvol.LogicalAddr) bool {
+	oldDist, _ := tree.cowDistance(graph.Nodes[oldNode].Owner)
+	newDist, _ := tree.cowDistance(graph.Nodes[newNode].Owner)
+	switch {
+	case newDist < oldDist:
+		// Replace the old one with the new lower-dist one.
+		return true
+	case newDist > oldDist:
+		// Retain the old lower-dist one.
+		return false
+	default:
+		oldGen := graph.Nodes[oldNode].Generation
+		newGen := graph.Nodes[newNode].Generation
+		switch {
+		case newGen > oldGen:
+			// Replace the old one with the new higher-gen one.
+			return true
+		case newGen < oldGen:
+			// Retain the old higher-gen one.
+			return false
+		default:
+			// This is a panic because I'm not really sure what the best way to
+			// handle this is, and so if this happens I want the program to crash
+			// and force me to figure out how to handle it.
+			panic(fmt.Errorf("dup nodes in tree=%v: old=%v=%v ; new=%v=%v",
+				tree.ID,
+				oldNode, graph.Nodes[oldNode],
+				newNode, graph.Nodes[newNode]))
+		}
+	}
+}
+
 // RebuiltTrees is an abstraction for rebuilding and accessing
 // potentially broken btrees.
 //
@@ -189,24 +222,9 @@ func (ts *RebuiltTrees) AddRoot(ctx context.Context, treeID btrfsprim.ObjID, roo
 			if oldPtr, exists := tree.Items.Load(itemKey); !exists {
 				tree.Items.Store(itemKey, newPtr)
 				numAdded++
-			} else {
-				oldGen := ts.graph.Nodes[oldPtr.Node].Generation
-				newGen := ts.graph.Nodes[newPtr.Node].Generation
-				switch {
-				case oldGen < newGen:
-					tree.Items.Store(itemKey, newPtr)
-					numReplaced++
-				case oldGen > newGen:
-					// keep the old one
-				case oldGen == newGen:
-					// This is a panic because I'm not really sure what the best way to
-					// handle this is, and so if this happens I want the program to crash
-					// and force me to figure out how to handle it.
-					panic(fmt.Errorf("dup key=%v in tree=%v: old=%v=%#v ; new=%v=%#v",
-						itemKey, treeID,
-						oldPtr, ts.graph.Nodes[oldPtr.Node],
-						newPtr, ts.graph.Nodes[newPtr.Node]))
-				}
+			} else if tree.shouldReplace(ts.graph, oldPtr.Node, newPtr.Node) {
+				tree.Items.Store(itemKey, newPtr)
+				numReplaced++
 			}
 			ts.cbAddedItem(ctx, treeID, itemKey)
 			progress(i)
@@ -327,26 +345,42 @@ func (tree *rebuiltTree) indexNode(graph pkggraph.Graph, node btrfsvol.LogicalAd
 		return
 	}
 	if slices.Contains(node, stack) {
-		return
+		panic("loop")
 	}
-	if !tree.isOwnerOK(graph.Nodes[node].Owner) {
+	if !tree.isOwnerOK(graph.Nodes[node].Owner, graph.Nodes[node].Generation) {
 		index[node] = nil
 		return
 	}
+
+	// tree.leafToRoots
+	stack = append(stack, node)
+	var roots containers.Set[btrfsvol.LogicalAddr]
 	kps := slices.RemoveAllFunc(graph.EdgesTo[node], func(kp *pkggraph.Edge) bool {
-		return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner)
+		return !tree.isOwnerOK(graph.Nodes[kp.FromNode].Owner, graph.Nodes[kp.FromNode].Generation)
 	})
-	if len(kps) == 0 {
-		index[node] = containers.NewSet[btrfsvol.LogicalAddr](node)
-		return
-	}
-	stack = append(stack, node)
-	roots := make(containers.Set[btrfsvol.LogicalAddr])
 	for _, kp := range kps {
 		tree.indexNode(graph, kp.FromNode, index, progress, stack)
-		roots.InsertFrom(index[kp.FromNode])
+		if len(index[kp.FromNode]) > 0 {
+			if roots == nil {
+				roots = make(containers.Set[btrfsvol.LogicalAddr])
+			}
+			roots.InsertFrom(index[kp.FromNode])
+		}
+	}
+	if roots == nil {
+		roots = containers.NewSet[btrfsvol.LogicalAddr](node)
 	}
 	index[node] = roots
+
+	// tree.keys
+	for i, key := range graph.Nodes[node].Items {
+		if oldPtr, ok := tree.keys.Load(key); !ok || tree.shouldReplace(graph, oldPtr.Node, node) {
+			tree.keys.Store(key, keyio.ItemPtr{
+				Node: node,
+				Idx:  i,
+			})
+		}
+	}
 }
 
 // Load reads an item from a tree.
@@ -426,6 +460,14 @@ func (ts *RebuiltTrees) LeafToRoots(ctx context.Context, treeID btrfsprim.ObjID,
 	return ret
 }
 
+// Keys returns a map of all keys in node that would be valid in this tree.
+//
+// It is invalid (panic) to call Keys for a tree without having called
+// AddTree first.
+func (ts *RebuiltTrees) Keys(treeID btrfsprim.ObjID) *containers.SortedMap[btrfsprim.Key, keyio.ItemPtr] {
+	return &ts.trees[treeID].keys
+}
+
 // COWDistance returns how many COW-snapshots down from the 'child'
 // tree is from the 'parent' tree.
 //
-- 
cgit v1.2.3-2-g168b