summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go14
1 files changed, 14 insertions, 0 deletions
diff --git a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go
index 9807764..7311aca 100644
--- a/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go
+++ b/lib/btrfsprogs/btrfsinspect/rebuildmappings/rebuildmappings.go
@@ -147,6 +147,20 @@ func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect
}
dlog.Info(_ctx, "... done processing block groups")
+ // More than once, I've been tempted to get rid of this exact-search step and just have the fuzzy-search step.
+ // After all, looking at the timestamps in the log, it's faster per blockgroup! For some background, the big-O
+ // for each (per blockgroup) looks like:
+ //
+ // - exact-search: O(bgSize+physicalBlocks)
+ // - fuzzy-search: O(bgSize*physicalBlocks) worst-case; O(bgSize*log(physicalBlocks)) expected
+ //
+ // The fuzzy-search is only fast because the exact-search is so good at getting `physicalBlocks` down.
+ // Empirically: if I remove the exact-search step, then the fuzzy-match step is more than an order of magnitude
+ // slower.
+ //
+ // The exact-search probably could be optimized to be substantially faster (by a constant factor; not affecting
+ // the big-O) by figuring out how to inline function calls and get reduce allocations, but IMO it's "fast
+ // enough" for now.
ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "5/6")
dlog.Infof(_ctx, "5/6: Searching for %d block groups in checksum map (exact)...", len(bgs))
physicalSums := ExtractPhysicalSums(scanResults)