summaryrefslogtreecommitdiff
path: root/cmd/btrfs-rec/inspect/rebuildmappings
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-28 14:05:27 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-03-14 19:45:07 -0600
commit8c8c6c27552f8554ba014c34d684cb90538ef65b (patch)
treef3a53ed194e29b516b52770e4949a1e508fad6a7 /cmd/btrfs-rec/inspect/rebuildmappings
parent34bf167ef33c57b4d6767273f1d265971a4693b9 (diff)
Move files around [ci-skip]
Diffstat (limited to 'cmd/btrfs-rec/inspect/rebuildmappings')
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/kmp.go113
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go126
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/process.go222
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go58
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go78
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go159
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go249
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go89
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/scan.go260
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go167
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d3
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e6003
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd603
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e419537383
-rw-r--r--cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d3
15 files changed, 1536 insertions, 0 deletions
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go b/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go
new file mode 100644
index 0000000..20772ba
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go
@@ -0,0 +1,113 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "errors"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/diskio"
+)
+
+type kmpPattern[K ~int64 | ~int, V comparable] interface {
+ PatLen() K
+ // Get the value at 'pos' in the sequence. Positions start at
+ // 0 and increment naturally. It is invalid to call Get(pos)
+ // with a pos that is >= Len(). If there is a gap/wildcard at
+ // pos, ok is false.
+ PatGet(pos K) (v V, ok bool)
+}
+
+func kmpSelfEq[K ~int64 | ~int, V comparable](s kmpPattern[K, V], aI K, bI K) bool {
+ aV, aOK := s.PatGet(aI)
+ bV, bOK := s.PatGet(bI)
+ if !aOK || !bOK {
+ return true
+ }
+ return aV == bV
+}
+
+// buildKMPTable takes the string 'substr', and returns a table such
+// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and
+// 'substr[:val] == substr[matchLen-val:matchLen]'.
+func buildKMPTable[K ~int64 | ~int, V comparable](substr kmpPattern[K, V]) []K {
+ substrLen := substr.PatLen()
+
+ table := make([]K, substrLen)
+ for j := K(0); j < substrLen; j++ {
+ if j == 0 {
+ // First entry must always be 0 (in order to
+ // satisfy 'val < matchLen').
+ continue
+ }
+ val := table[j-1]
+ // not a match; go back
+ for val > 0 && !kmpSelfEq(substr, j, val) {
+ val = table[val-1]
+ }
+ // is a match; go forward
+ if kmpSelfEq(substr, val, j) {
+ val++
+ }
+ table[j] = val
+ }
+ return table
+}
+
+func kmpEq[K ~int64 | ~int, V comparable](aV V, bS kmpPattern[K, V], bI K) bool {
+ bV, ok := bS.PatGet(bI)
+ if !ok {
+ return true
+ }
+ return aV == bV
+}
+
+// indexAll returns the starting-position of all possibly-overlapping
+// occurrences of 'substr' in the 'str' sequence.
+//
+// Will hop around in 'substr', but will only get the natural sequence
+// [0...) in order from 'str'.
+//
+// Will panic if the length of 'substr' is 0.
+//
+// The 'substr' may include wildcard characters by returning
+// ErrWildcard for a position.
+//
+// Uses the Knuth-Morris-Pratt algorithm.
+func indexAll[K ~int64 | ~int, V comparable](str diskio.Sequence[K, V], substr kmpPattern[K, V]) []K {
+ table := buildKMPTable(substr)
+ substrLen := K(len(table))
+ if substrLen == 0 {
+ panic(errors.New("rebuildmappings.IndexAll: empty substring"))
+ }
+
+ var matches []K
+ var curMatchBeg K
+ var curMatchLen K
+
+ strLen := str.SeqLen()
+ for pos := K(0); pos < strLen; pos++ {
+ chr := str.SeqGet(pos)
+
+ // Consider 'chr'
+ for curMatchLen > 0 && !kmpEq(chr, substr, curMatchLen) { // shorten the match
+ overlap := table[curMatchLen-1]
+ curMatchBeg += curMatchLen - overlap
+ curMatchLen = overlap
+ }
+ if kmpEq(chr, substr, curMatchLen) { // lengthen the match
+ if curMatchLen == 0 {
+ curMatchBeg = pos
+ }
+ curMatchLen++
+ if curMatchLen == substrLen {
+ matches = append(matches, curMatchBeg)
+ overlap := table[curMatchLen-1]
+ curMatchBeg += curMatchLen - overlap
+ curMatchLen = overlap
+ }
+ }
+ }
+ return matches
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go b/cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go
new file mode 100644
index 0000000..acec9b8
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go
@@ -0,0 +1,126 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/diskio"
+)
+
+type bytePattern[K ~int64 | ~int] []byte
+
+var _ kmpPattern[int, byte] = bytePattern[int]{}
+
+// PatLen implements kmpPattern.
+func (s bytePattern[K]) PatLen() K {
+ return K(len(s))
+}
+
+// PatGet implements kmpPattern.
+func (s bytePattern[K]) PatGet(i K) (byte, bool) {
+ chr := s[int(i)]
+ if chr == '.' {
+ return 0, false
+ }
+ return chr, true
+}
+
+func TestBuildKMPTable(t *testing.T) {
+ t.Parallel()
+ substr := bytePattern[int64]([]byte("ababaa"))
+ table := buildKMPTable[int64, byte](substr)
+ require.Equal(t,
+ []int64{0, 0, 1, 2, 3, 1},
+ table)
+ for j, val := range table {
+ matchLen := j + 1
+ assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
+ "for table[%d]=%d", j, val)
+ }
+}
+
+func FuzzBuildKMPTable(f *testing.F) {
+ f.Add([]byte("ababaa"))
+ f.Fuzz(func(t *testing.T, substr []byte) {
+ table := buildKMPTable[int64, byte](bytePattern[int64](substr))
+ require.Equal(t, len(substr), len(table), "length")
+ for j, val := range table {
+ matchLen := j + 1
+ assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen],
+ "for table[%d]=%d", j, val)
+ }
+ })
+}
+
+func NaiveIndexAll(str, substr []byte) []int64 {
+ var matches []int64
+ for i := range str {
+ if bytes.HasPrefix(str[i:], substr) {
+ matches = append(matches, int64(i))
+ }
+ }
+ return matches
+}
+
+func FuzzIndexAll(f *testing.F) {
+ f.Fuzz(func(t *testing.T, str, substr []byte) {
+ if len(substr) == 0 {
+ t.Skip()
+ }
+ t.Logf("str =%q", str)
+ t.Logf("substr=%q", substr)
+ exp := NaiveIndexAll(str, substr)
+ act := indexAll[int64, byte](
+ diskio.SliceSequence[int64, byte](str),
+ bytePattern[int64](substr))
+ assert.Equal(t, exp, act)
+ })
+}
+
+func TestKMPWildcard(t *testing.T) {
+ t.Parallel()
+ type testcase struct {
+ InStr string
+ InSubstr string
+ ExpMatches []int64
+ }
+ testcases := map[string]testcase{
+ "trivial-bar": {
+ InStr: "foo_bar",
+ InSubstr: "foo.ba.",
+ ExpMatches: []int64{0},
+ },
+ "trival-baz": {
+ InStr: "foo-baz",
+ InSubstr: "foo.ba.",
+ ExpMatches: []int64{0},
+ },
+ "suffix": {
+ InStr: "foobarbaz",
+ InSubstr: "...baz",
+ ExpMatches: []int64{3},
+ },
+ "overlap": {
+ InStr: "foobarbar",
+ InSubstr: "...bar",
+ ExpMatches: []int64{0, 3},
+ },
+ }
+ for tcName, tc := range testcases {
+ tc := tc
+ t.Run(tcName, func(t *testing.T) {
+ t.Parallel()
+ matches := indexAll[int64, byte](
+ diskio.StringSequence[int64](tc.InStr),
+ bytePattern[int64](tc.InSubstr))
+ assert.Equal(t, tc.ExpMatches, matches)
+ })
+ }
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process.go b/cmd/btrfs-rec/inspect/rebuildmappings/process.go
new file mode 100644
index 0000000..cdf5e5a
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/process.go
@@ -0,0 +1,222 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "context"
+ "fmt"
+
+ "github.com/datawire/dlib/dlog"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect"
+ "git.lukeshu.com/btrfs-progs-ng/lib/containers"
+ "git.lukeshu.com/btrfs-progs-ng/lib/maps"
+ "git.lukeshu.com/btrfs-progs-ng/lib/textui"
+)
+
+func getNodeSize(fs *btrfs.FS) (btrfsvol.AddrDelta, error) {
+ sb, err := fs.Superblock()
+ if err != nil {
+ return 0, err
+ }
+ return btrfsvol.AddrDelta(sb.NodeSize), nil
+}
+
+func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.ScanDevicesResult) error {
+ nodeSize, err := getNodeSize(fs)
+ if err != nil {
+ return err
+ }
+
+ var numChunks, numDevExts, numBlockGroups, numNodes int
+ devIDs := maps.SortedKeys(scanResults)
+ devices := fs.LV.PhysicalVolumes()
+ for _, devID := range devIDs {
+ if _, ok := devices[devID]; !ok {
+ return fmt.Errorf("device ID %v mentioned in scan results is not part of the filesystem", devID)
+ }
+ devResults := scanResults[devID]
+ numChunks += len(devResults.FoundChunks)
+ numDevExts += len(devResults.FoundDevExtents)
+ numBlockGroups += len(devResults.FoundBlockGroups)
+ for _, paddrs := range devResults.FoundNodes {
+ numNodes += len(paddrs)
+ }
+ }
+ dlog.Infof(ctx, "plan: 1/6 process %d chunks", numChunks)
+ dlog.Infof(ctx, "plan: 2/6 process %d device extents", numDevExts)
+ dlog.Infof(ctx, "plan: 3/6 process %d nodes", numNodes)
+ dlog.Infof(ctx, "plan: 4/6 process %d block groups", numBlockGroups)
+ dlog.Infof(ctx, "plan: 5/6 search for block groups in checksum map (exact)")
+ dlog.Infof(ctx, "plan: 6/6 search for block groups in checksum map (fuzzy)")
+
+ _ctx := ctx
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "1/6")
+ dlog.Infof(_ctx, "1/6: Processing %d chunks...", numChunks)
+ for _, devID := range devIDs {
+ devResults := scanResults[devID]
+ for _, chunk := range devResults.FoundChunks {
+ for _, mapping := range chunk.Chunk.Mappings(chunk.Key) {
+ if err := fs.LV.AddMapping(mapping); err != nil {
+ dlog.Errorf(ctx, "error: adding chunk: %v", err)
+ }
+ }
+ }
+ }
+ dlog.Info(_ctx, "... done processing chunks")
+
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "2/6")
+ dlog.Infof(_ctx, "2/6: Processing %d device extents...", numDevExts)
+ for _, devID := range devIDs {
+ devResults := scanResults[devID]
+ for _, ext := range devResults.FoundDevExtents {
+ if err := fs.LV.AddMapping(ext.DevExt.Mapping(ext.Key)); err != nil {
+ dlog.Errorf(ctx, "error: adding devext: %v", err)
+ }
+ }
+ }
+ dlog.Info(_ctx, "... done processing device extents")
+
+ // Do the nodes "last" to avoid bloating the mappings table
+ // too much. (Because nodes are numerous and small, while the
+ // others are few and large; so it is likely that many of the
+ // nodes will be subsumed by other things.)
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "3/6")
+ dlog.Infof(_ctx, "3/6: Processing %d nodes...", numNodes)
+ for _, devID := range devIDs {
+ devResults := scanResults[devID]
+ // Sort them so that progress numbers are predictable.
+ for _, laddr := range maps.SortedKeys(devResults.FoundNodes) {
+ for _, paddr := range devResults.FoundNodes[laddr] {
+ if err := fs.LV.AddMapping(btrfsvol.Mapping{
+ LAddr: laddr,
+ PAddr: btrfsvol.QualifiedPhysicalAddr{
+ Dev: devID,
+ Addr: paddr,
+ },
+ Size: nodeSize,
+ SizeLocked: false,
+ }); err != nil {
+ dlog.Errorf(ctx, "error: adding node ident: %v", err)
+ }
+ }
+ }
+ }
+ dlog.Info(_ctx, "... done processing nodes")
+
+ // Use block groups to add missing flags (and as a hint to
+ // combine node entries).
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "4/6")
+ dlog.Infof(_ctx, "4/6: Processing %d block groups...", numBlockGroups)
+ // First dedup them, because they change for allocations and
+ // CoW means that they'll bounce around a lot, so you likely
+ // have oodles of duplicates?
+ bgs, err := DedupBlockGroups(scanResults)
+ if err != nil {
+ return err
+ }
+ dlog.Infof(ctx, "... de-duplicated to %d block groups", len(bgs))
+ for _, bgLAddr := range maps.SortedKeys(bgs) {
+ bg := bgs[bgLAddr]
+ otherLAddr, otherPAddr := fs.LV.ResolveAny(bg.LAddr, bg.Size)
+ if otherLAddr < 0 || otherPAddr.Addr < 0 {
+ dlog.Errorf(ctx, "error: could not pair blockgroup laddr=%v (size=%v flags=%v) with a mapping",
+ bg.LAddr, bg.Size, bg.Flags)
+ continue
+ }
+
+ offsetWithinChunk := otherLAddr.Sub(bg.LAddr)
+ mapping := btrfsvol.Mapping{
+ LAddr: bg.LAddr,
+ PAddr: otherPAddr.Add(-offsetWithinChunk),
+ Size: bg.Size,
+ SizeLocked: true,
+ Flags: containers.Optional[btrfsvol.BlockGroupFlags]{
+ OK: true,
+ Val: bg.Flags,
+ },
+ }
+ if err := fs.LV.AddMapping(mapping); err != nil {
+ dlog.Errorf(ctx, "error: adding flags from blockgroup: %v", err)
+ continue
+ }
+ delete(bgs, bgLAddr)
+ }
+ dlog.Info(_ctx, "... done processing block groups")
+
+ // More than once, I've been tempted to get rid of this exact-search step and just have the fuzzy-search step.
+ // After all, looking at the timestamps in the log, it's faster per blockgroup! For some background, the big-O
+ // for each (per blockgroup) looks like:
+ //
+ // - exact-search: O(bgSize+physicalBlocks)
+ // - fuzzy-search: O(bgSize*physicalBlocks) worst-case; O(bgSize*log(physicalBlocks)) expected
+ //
+ // The fuzzy-search is only fast because the exact-search is so good at getting `physicalBlocks` down.
+ // Empirically: if I remove the exact-search step, then the fuzzy-match step is more than an order of magnitude
+ // slower.
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "5/6")
+ dlog.Infof(_ctx, "5/6: Searching for %d block groups in checksum map (exact)...", len(bgs))
+ physicalSums := ExtractPhysicalSums(scanResults)
+ logicalSums := ExtractLogicalSums(ctx, scanResults)
+ if err := matchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil {
+ return err
+ }
+ dlog.Info(ctx, "... done searching for exact block groups")
+
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "6/6")
+ dlog.Infof(_ctx, "6/6: Searching for %d block groups in checksum map (fuzzy)...", len(bgs))
+ if err := fuzzyMatchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil {
+ return err
+ }
+ dlog.Info(_ctx, "... done searching for fuzzy block groups")
+
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "report")
+ dlog.Info(_ctx, "report:")
+
+ unmappedPhysicalRegions := ListUnmappedPhysicalRegions(fs)
+ var unmappedPhysical btrfsvol.AddrDelta
+ var numUnmappedPhysical int
+ for _, devRegions := range unmappedPhysicalRegions {
+ numUnmappedPhysical += len(devRegions)
+ for _, region := range devRegions {
+ unmappedPhysical += region.End.Sub(region.Beg)
+ }
+ }
+ dlog.Infof(ctx, "... %d of unmapped physical space (across %d regions)", textui.IEC(unmappedPhysical, "B"), numUnmappedPhysical)
+
+ unmappedLogicalRegions := ListUnmappedLogicalRegions(fs, logicalSums)
+ var unmappedLogical btrfsvol.AddrDelta
+ for _, region := range unmappedLogicalRegions {
+ unmappedLogical += region.Size()
+ }
+ dlog.Infof(ctx, "... %d of unmapped summed logical space (across %d regions)", textui.IEC(unmappedLogical, "B"), len(unmappedLogicalRegions))
+
+ var unmappedBlockGroups btrfsvol.AddrDelta
+ for _, bg := range bgs {
+ unmappedBlockGroups += bg.Size
+ }
+ dlog.Infof(ctx, "... %d of unmapped block groups (across %d groups)", textui.IEC(unmappedBlockGroups, "B"), len(bgs))
+
+ dlog.Info(_ctx, "detailed report:")
+ for _, devID := range maps.SortedKeys(unmappedPhysicalRegions) {
+ for _, region := range unmappedPhysicalRegions[devID] {
+ dlog.Infof(ctx, "... unmapped physical region: dev=%v beg=%v end=%v (size=%v)",
+ devID, region.Beg, region.End, region.End.Sub(region.Beg))
+ }
+ }
+ for _, region := range unmappedLogicalRegions {
+ dlog.Infof(ctx, "... umapped summed logical region: beg=%v end=%v (size=%v)",
+ region.Addr, region.Addr.Add(region.Size()), region.Size())
+ }
+ for _, laddr := range maps.SortedKeys(bgs) {
+ bg := bgs[laddr]
+ dlog.Infof(ctx, "... umapped block group: beg=%v end=%v (size=%v) flags=%v",
+ bg.LAddr, bg.LAddr.Add(bg.Size), bg.Size, bg.Flags)
+ }
+
+ return nil
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go
new file mode 100644
index 0000000..0e2d5a0
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go
@@ -0,0 +1,58 @@
+// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "fmt"
+ "sort"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect"
+ "git.lukeshu.com/btrfs-progs-ng/lib/containers"
+ "git.lukeshu.com/btrfs-progs-ng/lib/maps"
+)
+
+type BlockGroup struct {
+ LAddr btrfsvol.LogicalAddr
+ Size btrfsvol.AddrDelta
+ Flags btrfsvol.BlockGroupFlags
+}
+
+func DedupBlockGroups(scanResults btrfsinspect.ScanDevicesResult) (map[btrfsvol.LogicalAddr]BlockGroup, error) {
+ // Dedup
+ bgsSet := make(containers.Set[BlockGroup])
+ for _, devResults := range scanResults {
+ for _, bg := range devResults.FoundBlockGroups {
+ bgsSet.Insert(BlockGroup{
+ LAddr: btrfsvol.LogicalAddr(bg.Key.ObjectID),
+ Size: btrfsvol.AddrDelta(bg.Key.Offset),
+ Flags: bg.BG.Flags,
+ })
+ }
+ }
+
+ // Sort
+ bgsOrdered := maps.Keys(bgsSet)
+ sort.Slice(bgsOrdered, func(i, j int) bool {
+ return bgsOrdered[i].LAddr < bgsOrdered[j].LAddr
+ })
+
+ // Sanity check
+ var pos btrfsvol.LogicalAddr
+ for _, bg := range bgsOrdered {
+ if bg.LAddr < pos || bg.Size < 0 {
+ return nil, fmt.Errorf("found block groups are inconsistent")
+ }
+ pos = bg.LAddr.Add(bg.Size)
+ }
+
+ // Return. We return a map instead of a slice in order to
+ // facilitate easy deletes.
+ bgsMap := make(map[btrfsvol.LogicalAddr]BlockGroup, len(bgsSet))
+ for bg := range bgsSet {
+ bgsMap[bg.LAddr] = bg
+ }
+ return bgsMap, nil
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go
new file mode 100644
index 0000000..a3e724e
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go
@@ -0,0 +1,78 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "context"
+
+ "github.com/datawire/dlib/dlog"
+ "golang.org/x/text/number"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/containers"
+ "git.lukeshu.com/btrfs-progs-ng/lib/maps"
+)
+
+func matchBlockGroupSums(ctx context.Context,
+ fs *btrfs.FS,
+ blockgroups map[btrfsvol.LogicalAddr]BlockGroup,
+ physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr],
+ logicalSums SumRunWithGaps[btrfsvol.LogicalAddr],
+) error {
+ regions := ListUnmappedPhysicalRegions(fs)
+ numBlockgroups := len(blockgroups)
+ for i, bgLAddr := range maps.SortedKeys(blockgroups) {
+ blockgroup := blockgroups[bgLAddr]
+ bgRun := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size)
+ if len(bgRun.Runs) == 0 {
+ dlog.Errorf(ctx, "(%v/%v) blockgroup[laddr=%v] can't be matched because it has 0 runs",
+ i+1, numBlockgroups, bgLAddr)
+ continue
+ }
+
+ var matches []btrfsvol.QualifiedPhysicalAddr
+ if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error {
+ rawMatches := indexAll[int, btrfssum.ShortSum](region, bgRun)
+ for _, match := range rawMatches {
+ matches = append(matches, btrfsvol.QualifiedPhysicalAddr{
+ Dev: devID,
+ Addr: region.Addr + (btrfsvol.PhysicalAddr(match) * btrfssum.BlockSize),
+ })
+ }
+ return nil
+ }); err != nil {
+ return err
+ }
+
+ lvl := dlog.LogLevelError
+ if len(matches) == 1 {
+ lvl = dlog.LogLevelInfo
+ }
+ dlog.Logf(ctx, lvl, "(%v/%v) blockgroup[laddr=%v] has %v matches based on %v coverage from %v runs",
+ i+1, numBlockgroups, bgLAddr, len(matches), number.Percent(bgRun.PctFull()), len(bgRun.Runs))
+ if len(matches) != 1 {
+ continue
+ }
+
+ mapping := btrfsvol.Mapping{
+ LAddr: blockgroup.LAddr,
+ PAddr: matches[0],
+ Size: blockgroup.Size,
+ SizeLocked: true,
+ Flags: containers.Optional[btrfsvol.BlockGroupFlags]{
+ OK: true,
+ Val: blockgroup.Flags,
+ },
+ }
+ if err := fs.LV.AddMapping(mapping); err != nil {
+ dlog.Errorf(ctx, "error: %v", err)
+ continue
+ }
+ delete(blockgroups, bgLAddr)
+ }
+ return nil
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go
new file mode 100644
index 0000000..4724c12
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go
@@ -0,0 +1,159 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "context"
+ "sort"
+
+ "github.com/datawire/dlib/dlog"
+ "golang.org/x/text/number"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/containers"
+ "git.lukeshu.com/btrfs-progs-ng/lib/maps"
+ "git.lukeshu.com/btrfs-progs-ng/lib/textui"
+)
+
+var minFuzzyPct = textui.Tunable(0.5)
+
+type fuzzyRecord struct {
+ PAddr btrfsvol.QualifiedPhysicalAddr
+ N int
+}
+
+func (a fuzzyRecord) Compare(b fuzzyRecord) int {
+ switch {
+ case a.N < b.N:
+ return -1
+ case a.N > b.N:
+ return 1
+ default:
+ return 0
+ }
+}
+
+func fuzzyMatchBlockGroupSums(ctx context.Context,
+ fs *btrfs.FS,
+ blockgroups map[btrfsvol.LogicalAddr]BlockGroup,
+ physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr],
+ logicalSums SumRunWithGaps[btrfsvol.LogicalAddr],
+) error {
+ _ctx := ctx
+
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.substep", "indexing")
+ dlog.Info(ctx, "Indexing physical regions...") // O(m)
+ regions := ListUnmappedPhysicalRegions(fs)
+ physicalIndex := make(map[btrfssum.ShortSum][]btrfsvol.QualifiedPhysicalAddr)
+ if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error {
+ return region.Walk(ctx, func(paddr btrfsvol.PhysicalAddr, sum btrfssum.ShortSum) error {
+ physicalIndex[sum] = append(physicalIndex[sum], btrfsvol.QualifiedPhysicalAddr{
+ Dev: devID,
+ Addr: paddr,
+ })
+ return nil
+ })
+ }); err != nil {
+ return err
+ }
+ dlog.Info(ctx, "... done indexing")
+
+ ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.substep", "searching")
+ dlog.Info(ctx, "Searching...")
+ numBlockgroups := len(blockgroups)
+ for i, bgLAddr := range maps.SortedKeys(blockgroups) {
+ blockgroup := blockgroups[bgLAddr]
+ bgRun := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size)
+
+ d := bgRun.PatLen()
+ matches := make(map[btrfsvol.QualifiedPhysicalAddr]int)
+ if err := bgRun.Walk(ctx, func(laddr btrfsvol.LogicalAddr, sum btrfssum.ShortSum) error { // O(n*…
+ off := laddr.Sub(bgRun.Addr)
+ for _, paddr := range physicalIndex[sum] { // …log(m)) expected but …m) worst
+ key := btrfsvol.QualifiedPhysicalAddr{
+ Dev: paddr.Dev,
+ Addr: paddr.Addr.Add(-off),
+ }
+ matches[key]++
+ }
+ return nil
+ }); err != nil {
+ return err
+ }
+
+ best := lowestN[fuzzyRecord]{N: 2}
+ for paddr, n := range matches { // O(m)
+ best.Insert(fuzzyRecord{
+ PAddr: paddr,
+ N: d - n,
+ })
+ }
+
+ var apply bool
+ var matchesStr string
+ switch len(best.Dat) {
+ case 0: // can happen if there are no sums in the run
+ matchesStr = ""
+ case 1: // not sure how this can happen, but whatev
+ pct := float64(d-best.Dat[0].N) / float64(d)
+ matchesStr = textui.Sprintf("%v", number.Percent(pct))
+ apply = pct > minFuzzyPct
+ case 2:
+ pct := float64(d-best.Dat[0].N) / float64(d)
+ pct2 := float64(d-best.Dat[1].N) / float64(d)
+ matchesStr = textui.Sprintf("best=%v secondbest=%v", number.Percent(pct), number.Percent(pct2))
+ apply = pct > minFuzzyPct && pct2 < minFuzzyPct
+ }
+ lvl := dlog.LogLevelError
+ if apply {
+ lvl = dlog.LogLevelInfo
+ }
+ dlog.Logf(ctx, lvl, "(%v/%v) blockgroup[laddr=%v] matches=[%s]; bestpossible=%v (based on %v runs)",
+ i+1, numBlockgroups, bgLAddr, matchesStr, number.Percent(bgRun.PctFull()), len(bgRun.Runs))
+ if !apply {
+ continue
+ }
+
+ mapping := btrfsvol.Mapping{
+ LAddr: blockgroup.LAddr,
+ PAddr: best.Dat[0].PAddr,
+ Size: blockgroup.Size,
+ SizeLocked: true,
+ Flags: containers.Optional[btrfsvol.BlockGroupFlags]{
+ OK: true,
+ Val: blockgroup.Flags,
+ },
+ }
+ if err := fs.LV.AddMapping(mapping); err != nil {
+ dlog.Errorf(ctx, "error: %v", err)
+ continue
+ }
+ delete(blockgroups, bgLAddr)
+ }
+ dlog.Info(ctx, "done searching")
+
+ return nil
+}
+
+type lowestN[T containers.Ordered[T]] struct {
+ N int
+ Dat []T
+}
+
+func (l *lowestN[T]) Insert(v T) {
+ switch {
+ case len(l.Dat) < l.N:
+ l.Dat = append(l.Dat, v)
+ case v.Compare(l.Dat[0]) < 0:
+ l.Dat[0] = v
+ default:
+ return
+ }
+ sort.Slice(l.Dat, func(i, j int) bool {
+ return l.Dat[i].Compare(l.Dat[j]) < 0
+ })
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go
new file mode 100644
index 0000000..7c02d05
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go
@@ -0,0 +1,249 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "context"
+ "sort"
+ "strings"
+
+ "github.com/datawire/dlib/dlog"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect"
+ "git.lukeshu.com/btrfs-progs-ng/lib/containers"
+ "git.lukeshu.com/btrfs-progs-ng/lib/slices"
+)
+
+func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) SumRunWithGaps[btrfsvol.LogicalAddr] {
+ var records []btrfsinspect.SysExtentCSum
+ for _, devResults := range scanResults {
+ records = append(records, devResults.FoundExtentCSums...)
+ }
+ // Sort lower-generation items earlier; then sort by addr.
+ sort.Slice(records, func(i, j int) bool {
+ a, b := records[i], records[j]
+ switch {
+ case a.Generation < b.Generation:
+ return true
+ case a.Generation > b.Generation:
+ return false
+ default:
+ return a.Sums.Addr < b.Sums.Addr
+ }
+ })
+ if len(records) == 0 {
+ return SumRunWithGaps[btrfsvol.LogicalAddr]{}
+ }
+ sumSize := records[0].Sums.ChecksumSize
+
+ // Now build them in to a coherent address space.
+ //
+ // We can't just reverse-sort by generation to avoid mutations, because given
+ //
+ // gen1 AAAAAAA
+ // gen2 BBBBBBBB
+ // gen3 CCCCCCC
+ //
+ // "AAAAAAA" shouldn't be present, and if we just discard "BBBBBBBB"
+ // because it conflicts with "CCCCCCC", then we would erroneously
+ // include "AAAAAAA".
+ addrspace := new(containers.RBTree[btrfsinspect.SysExtentCSum])
+ for _, newRecord := range records {
+ for {
+ conflict := addrspace.Search(func(oldRecord btrfsinspect.SysExtentCSum) int {
+ switch {
+ case newRecord.Sums.Addr.Add(newRecord.Sums.Size()) <= oldRecord.Sums.Addr:
+ // 'newRecord' is wholly to the left of 'oldRecord'.
+ return -1
+ case oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()) <= newRecord.Sums.Addr:
+ // 'oldRecord' is wholly to the left of 'newRecord'.
+ return 1
+ default:
+ // There is some overlap.
+ return 0
+ }
+ })
+ if conflict == nil {
+ // We can insert it
+ addrspace.Insert(newRecord)
+ break
+ }
+ oldRecord := conflict.Value
+ if oldRecord == newRecord {
+ // Duplicates are to be expected.
+ break
+ }
+ if oldRecord.Generation < newRecord.Generation {
+ // Newer generation wins.
+ addrspace.Delete(conflict)
+ // loop around to check for more conflicts
+ continue
+ }
+ if oldRecord.Generation > newRecord.Generation {
+ // We sorted them! This shouldn't happen.
+ panic("should not happen")
+ }
+ // Since sums are stored multiple times (RAID?), but may
+ // be split between items differently between copies, we
+ // should take the union (after verifying that they
+ // agree on the overlapping part).
+ overlapBeg := slices.Max(
+ oldRecord.Sums.Addr,
+ newRecord.Sums.Addr)
+ overlapEnd := slices.Min(
+ oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()),
+ newRecord.Sums.Addr.Add(newRecord.Sums.Size()))
+
+ oldOverlapBeg := int(overlapBeg.Sub(oldRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize
+ oldOverlapEnd := int(overlapEnd.Sub(oldRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize
+ oldOverlap := oldRecord.Sums.Sums[oldOverlapBeg:oldOverlapEnd]
+
+ newOverlapBeg := int(overlapBeg.Sub(newRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize
+ newOverlapEnd := int(overlapEnd.Sub(newRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize
+ newOverlap := newRecord.Sums.Sums[newOverlapBeg:newOverlapEnd]
+
+ if oldOverlap != newOverlap {
+ dlog.Errorf(ctx, "mismatch: {gen:%v, addr:%v, size:%v} conflicts with {gen:%v, addr:%v, size:%v}",
+ oldRecord.Generation, oldRecord.Sums.Addr, oldRecord.Sums.Size(),
+ newRecord.Generation, newRecord.Sums.Addr, newRecord.Sums.Size())
+ break
+ }
+ // OK, we match, so take the union.
+ var prefix, suffix btrfssum.ShortSum
+ switch {
+ case oldRecord.Sums.Addr < overlapBeg:
+ prefix = oldRecord.Sums.Sums[:oldOverlapBeg]
+ case newRecord.Sums.Addr < overlapBeg:
+ prefix = newRecord.Sums.Sums[:newOverlapBeg]
+ }
+ switch {
+ case oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()) > overlapEnd:
+ suffix = oldRecord.Sums.Sums[oldOverlapEnd:]
+ case newRecord.Sums.Addr.Add(newRecord.Sums.Size()) > overlapEnd:
+ suffix = newRecord.Sums.Sums[newOverlapEnd:]
+ }
+ unionRecord := btrfsinspect.SysExtentCSum{
+ Generation: oldRecord.Generation,
+ Sums: btrfsitem.ExtentCSum{
+ SumRun: btrfssum.SumRun[btrfsvol.LogicalAddr]{
+ ChecksumSize: oldRecord.Sums.ChecksumSize,
+ Addr: slices.Min(oldRecord.Sums.Addr, newRecord.Sums.Addr),
+ Sums: prefix + oldOverlap + suffix,
+ },
+ },
+ }
+ addrspace.Delete(conflict)
+ newRecord = unionRecord
+ // loop around to check for more conflicts
+ }
+ }
+
+ // Now flatten that RBTree in to a SumRunWithGaps.
+ var flattened SumRunWithGaps[btrfsvol.LogicalAddr]
+ var curAddr btrfsvol.LogicalAddr
+ var curSums strings.Builder
+ addrspace.Range(func(node *containers.RBNode[btrfsinspect.SysExtentCSum]) bool {
+ curEnd := curAddr + (btrfsvol.LogicalAddr(curSums.Len()/sumSize) * btrfssum.BlockSize)
+ if node.Value.Sums.Addr != curEnd {
+ if curSums.Len() > 0 {
+ flattened.Runs = append(flattened.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{
+ ChecksumSize: sumSize,
+ Addr: curAddr,
+ Sums: btrfssum.ShortSum(curSums.String()),
+ })
+ }
+ curAddr = node.Value.Sums.Addr
+ curSums.Reset()
+ }
+ curSums.WriteString(string(node.Value.Sums.Sums))
+ return true
+ })
+ if curSums.Len() > 0 {
+ flattened.Runs = append(flattened.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{
+ ChecksumSize: sumSize,
+ Addr: curAddr,
+ Sums: btrfssum.ShortSum(curSums.String()),
+ })
+ }
+ flattened.Addr = flattened.Runs[0].Addr
+ last := flattened.Runs[len(flattened.Runs)-1]
+ end := last.Addr.Add(last.Size())
+ flattened.Size = end.Sub(flattened.Addr)
+
+ return flattened
+}
+
+func ListUnmappedLogicalRegions(fs *btrfs.FS, logicalSums SumRunWithGaps[btrfsvol.LogicalAddr]) []btrfssum.SumRun[btrfsvol.LogicalAddr] {
+ // There are a lot of ways this algorithm could be made
+ // faster.
+ var ret []btrfssum.SumRun[btrfsvol.LogicalAddr]
+ var cur struct {
+ Addr btrfsvol.LogicalAddr
+ Size btrfsvol.AddrDelta
+ }
+ for _, run := range logicalSums.Runs {
+ for addr := run.Addr; addr < run.Addr.Add(run.Size()); addr += btrfssum.BlockSize {
+ if _, maxlen := fs.LV.Resolve(addr); maxlen < btrfssum.BlockSize {
+ if cur.Size == 0 {
+ cur.Addr = addr
+ cur.Size = 0
+ }
+ cur.Size += btrfssum.BlockSize
+ } else if cur.Size > 0 {
+ begIdx := int(cur.Addr.Sub(run.Addr)/btrfssum.BlockSize) * run.ChecksumSize
+ lenIdx := (int(cur.Size) / btrfssum.BlockSize) * run.ChecksumSize
+ endIdx := begIdx + lenIdx
+ ret = append(ret, btrfssum.SumRun[btrfsvol.LogicalAddr]{
+ ChecksumSize: run.ChecksumSize,
+ Addr: cur.Addr,
+ Sums: run.Sums[begIdx:endIdx],
+ })
+ cur.Size = 0
+ }
+ }
+ if cur.Size > 0 {
+ begIdx := int(cur.Addr.Sub(run.Addr)/btrfssum.BlockSize) * run.ChecksumSize
+ lenIdx := (int(cur.Size) / btrfssum.BlockSize) * run.ChecksumSize
+ endIdx := begIdx + lenIdx
+ ret = append(ret, btrfssum.SumRun[btrfsvol.LogicalAddr]{
+ ChecksumSize: run.ChecksumSize,
+ Addr: cur.Addr,
+ Sums: run.Sums[begIdx:endIdx],
+ })
+ cur.Size = 0
+ }
+ }
+ return ret
+}
+
+func SumsForLogicalRegion(sums SumRunWithGaps[btrfsvol.LogicalAddr], beg btrfsvol.LogicalAddr, size btrfsvol.AddrDelta) SumRunWithGaps[btrfsvol.LogicalAddr] {
+ runs := SumRunWithGaps[btrfsvol.LogicalAddr]{
+ Addr: beg,
+ Size: size,
+ }
+ for laddr := beg; laddr < beg.Add(size); {
+ run, next, ok := sums.RunForAddr(laddr)
+ if !ok {
+ laddr = next
+ continue
+ }
+ off := int((laddr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize
+ deltaAddr := slices.Min[btrfsvol.AddrDelta](
+ size-laddr.Sub(beg),
+ btrfsvol.AddrDelta((len(run.Sums)-off)/run.ChecksumSize)*btrfssum.BlockSize)
+ deltaOff := int(deltaAddr/btrfssum.BlockSize) * run.ChecksumSize
+ runs.Runs = append(runs.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{
+ ChecksumSize: run.ChecksumSize,
+ Addr: laddr,
+ Sums: run.Sums[off : off+deltaOff],
+ })
+ laddr = laddr.Add(deltaAddr)
+ }
+ return runs
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go
new file mode 100644
index 0000000..da22fbf
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go
@@ -0,0 +1,89 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "context"
+ "sort"
+
+ "golang.org/x/exp/constraints"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect"
+ "git.lukeshu.com/btrfs-progs-ng/lib/maps"
+)
+
+func ExtractPhysicalSums(scanResults btrfsinspect.ScanDevicesResult) map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr] {
+ ret := make(map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], len(scanResults))
+ for devID, devResults := range scanResults {
+ ret[devID] = devResults.Checksums
+ }
+ return ret
+}
+
+type PhysicalRegion struct {
+ Beg, End btrfsvol.PhysicalAddr
+}
+
+func ListUnmappedPhysicalRegions(fs *btrfs.FS) map[btrfsvol.DeviceID][]PhysicalRegion {
+ regions := make(map[btrfsvol.DeviceID][]PhysicalRegion)
+ pos := make(map[btrfsvol.DeviceID]btrfsvol.PhysicalAddr)
+ mappings := fs.LV.Mappings()
+ sort.Slice(mappings, func(i, j int) bool {
+ return mappings[i].PAddr.Compare(mappings[j].PAddr) < 0
+ })
+ for _, mapping := range mappings {
+ if pos[mapping.PAddr.Dev] < mapping.PAddr.Addr {
+ regions[mapping.PAddr.Dev] = append(regions[mapping.PAddr.Dev], PhysicalRegion{
+ Beg: pos[mapping.PAddr.Dev],
+ End: mapping.PAddr.Addr,
+ })
+ }
+ if pos[mapping.PAddr.Dev] < mapping.PAddr.Addr.Add(mapping.Size) {
+ pos[mapping.PAddr.Dev] = mapping.PAddr.Addr.Add(mapping.Size)
+ }
+ }
+ for devID, dev := range fs.LV.PhysicalVolumes() {
+ devSize := dev.Size()
+ if pos[devID] < devSize {
+ regions[devID] = append(regions[devID], PhysicalRegion{
+ Beg: pos[devID],
+ End: devSize,
+ })
+ }
+ }
+ return regions
+}
+
+func roundUp[T constraints.Integer](x, multiple T) T {
+ return ((x + multiple - 1) / multiple) * multiple
+}
+
+func WalkUnmappedPhysicalRegions(ctx context.Context,
+ physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr],
+ gaps map[btrfsvol.DeviceID][]PhysicalRegion,
+ fn func(btrfsvol.DeviceID, btrfssum.SumRun[btrfsvol.PhysicalAddr]) error,
+) error {
+ for _, devID := range maps.SortedKeys(gaps) {
+ for _, gap := range gaps[devID] {
+ if err := ctx.Err(); err != nil {
+ return err
+ }
+ begAddr := roundUp(gap.Beg, btrfssum.BlockSize)
+ begOff := int(begAddr/btrfssum.BlockSize) * physicalSums[devID].ChecksumSize
+ endOff := int(gap.End/btrfssum.BlockSize) * physicalSums[devID].ChecksumSize
+ if err := fn(devID, btrfssum.SumRun[btrfsvol.PhysicalAddr]{
+ ChecksumSize: physicalSums[devID].ChecksumSize,
+ Addr: begAddr,
+ Sums: physicalSums[devID].Sums[begOff:endOff],
+ }); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/scan.go b/cmd/btrfs-rec/inspect/rebuildmappings/scan.go
new file mode 100644
index 0000000..d54be71
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/scan.go
@@ -0,0 +1,260 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package btrfsinspect
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/datawire/dlib/dgroup"
+ "github.com/datawire/dlib/dlog"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/binstruct"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/containers"
+ "git.lukeshu.com/btrfs-progs-ng/lib/textui"
+)
+
+type ScanDevicesResult map[btrfsvol.DeviceID]ScanOneDeviceResult
+
+func ScanDevices(ctx context.Context, fs *btrfs.FS) (ScanDevicesResult, error) {
+ grp := dgroup.NewGroup(ctx, dgroup.GroupConfig{})
+ var mu sync.Mutex
+ result := make(map[btrfsvol.DeviceID]ScanOneDeviceResult)
+ for id, dev := range fs.LV.PhysicalVolumes() {
+ id := id
+ dev := dev
+ grp.Go(fmt.Sprintf("dev-%d", id), func(ctx context.Context) error {
+ sb, err := dev.Superblock()
+ if err != nil {
+ return err
+ }
+ devResult, err := ScanOneDevice(ctx, dev, *sb)
+ if err != nil {
+ return err
+ }
+ mu.Lock()
+ result[id] = devResult
+ mu.Unlock()
+ return nil
+ })
+ }
+ if err := grp.Wait(); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+type ScanOneDeviceResult struct {
+ Checksums btrfssum.SumRun[btrfsvol.PhysicalAddr]
+ FoundNodes map[btrfsvol.LogicalAddr][]btrfsvol.PhysicalAddr
+ FoundChunks []btrfstree.SysChunk
+ FoundBlockGroups []SysBlockGroup
+ FoundDevExtents []SysDevExtent
+ FoundExtentCSums []SysExtentCSum
+}
+
+type SysBlockGroup struct {
+ Key btrfsprim.Key
+ BG btrfsitem.BlockGroup
+}
+
+type SysDevExtent struct {
+ Key btrfsprim.Key
+ DevExt btrfsitem.DevExtent
+}
+
+type SysExtentCSum struct {
+ Generation btrfsprim.Generation
+ Sums btrfsitem.ExtentCSum
+}
+
+// Compare implements containers.Ordered.
+func (a SysExtentCSum) Compare(b SysExtentCSum) int {
+ return containers.NativeCompare(a.Sums.Addr, b.Sums.Addr)
+}
+
+type scanStats struct {
+ textui.Portion[btrfsvol.PhysicalAddr]
+
+ NumFoundNodes int
+ NumFoundChunks int
+ NumFoundBlockGroups int
+ NumFoundDevExtents int
+ NumFoundExtentCSums int
+}
+
+func (s scanStats) String() string {
+ return textui.Sprintf("scanned %v (found: %v nodes, %v chunks, %v block groups, %v dev extents, %v sum items)",
+ s.Portion,
+ s.NumFoundNodes,
+ s.NumFoundChunks,
+ s.NumFoundBlockGroups,
+ s.NumFoundDevExtents,
+ s.NumFoundExtentCSums)
+}
+
+var sbSize = btrfsvol.PhysicalAddr(binstruct.StaticSize(btrfstree.Superblock{}))
+
+// ScanOneDevice mostly mimics btrfs-progs
+// cmds/rescue-chunk-recover.c:scan_one_device().
+func ScanOneDevice(ctx context.Context, dev *btrfs.Device, sb btrfstree.Superblock) (ScanOneDeviceResult, error) {
+ ctx = dlog.WithField(ctx, "btrfsinspect.scandevices.dev", dev.Name())
+
+ result := ScanOneDeviceResult{
+ FoundNodes: make(map[btrfsvol.LogicalAddr][]btrfsvol.PhysicalAddr),
+ }
+
+ devSize := dev.Size()
+ if sb.NodeSize < sb.SectorSize {
+ return result, fmt.Errorf("node_size(%v) < sector_size(%v)",
+ sb.NodeSize, sb.SectorSize)
+ }
+ if sb.SectorSize != btrfssum.BlockSize {
+ // TODO: probably handle this?
+ return result, fmt.Errorf("sector_size(%v) != btrfssum.BlockSize",
+ sb.SectorSize)
+ }
+ alg := sb.ChecksumType
+ csumSize := alg.Size()
+ numSums := int(devSize / btrfssum.BlockSize)
+ var sums strings.Builder
+ sums.Grow(numSums * csumSize)
+
+ progressWriter := textui.NewProgress[scanStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second))
+ progress := func(pos btrfsvol.PhysicalAddr) {
+ progressWriter.Set(scanStats{
+ Portion: textui.Portion[btrfsvol.PhysicalAddr]{
+ N: pos,
+ D: devSize,
+ },
+ NumFoundNodes: len(result.FoundNodes),
+ NumFoundChunks: len(result.FoundChunks),
+ NumFoundBlockGroups: len(result.FoundBlockGroups),
+ NumFoundDevExtents: len(result.FoundDevExtents),
+ NumFoundExtentCSums: len(result.FoundExtentCSums),
+ })
+ }
+
+ var minNextNode btrfsvol.PhysicalAddr
+ for i := 0; i < numSums; i++ {
+ if ctx.Err() != nil {
+ return result, ctx.Err()
+ }
+ pos := btrfsvol.PhysicalAddr(i * btrfssum.BlockSize)
+ progress(pos)
+
+ sum, err := btrfs.ChecksumPhysical(dev, alg, pos)
+ if err != nil {
+ return result, err
+ }
+ sums.Write(sum[:csumSize])
+
+ checkForNode := pos >= minNextNode && pos+btrfsvol.PhysicalAddr(sb.NodeSize) <= devSize
+ if checkForNode {
+ for _, sbAddr := range btrfs.SuperblockAddrs {
+ if sbAddr <= pos && pos < sbAddr+sbSize {
+ checkForNode = false
+ break
+ }
+ }
+ }
+
+ if checkForNode {
+ nodeRef, err := btrfstree.ReadNode[btrfsvol.PhysicalAddr](dev, sb, pos, btrfstree.NodeExpectations{})
+ if err != nil {
+ if !errors.Is(err, btrfstree.ErrNotANode) {
+ dlog.Errorf(ctx, "error: %v", err)
+ }
+ } else {
+ result.FoundNodes[nodeRef.Data.Head.Addr] = append(result.FoundNodes[nodeRef.Data.Head.Addr], nodeRef.Addr)
+ for i, item := range nodeRef.Data.BodyLeaf {
+ switch item.Key.ItemType {
+ case btrfsitem.CHUNK_ITEM_KEY:
+ switch itemBody := item.Body.(type) {
+ case *btrfsitem.Chunk:
+ dlog.Tracef(ctx, "node@%v: item %v: found chunk",
+ nodeRef.Addr, i)
+ result.FoundChunks = append(result.FoundChunks, btrfstree.SysChunk{
+ Key: item.Key,
+ Chunk: *itemBody,
+ })
+ case *btrfsitem.Error:
+ dlog.Errorf(ctx, "node@%v: item %v: error: malformed CHUNK_ITEM: %v",
+ nodeRef.Addr, i, itemBody.Err)
+ default:
+ panic(fmt.Errorf("should not happen: CHUNK_ITEM has unexpected item type: %T", itemBody))
+ }
+ case btrfsitem.BLOCK_GROUP_ITEM_KEY:
+ switch itemBody := item.Body.(type) {
+ case *btrfsitem.BlockGroup:
+ dlog.Tracef(ctx, "node@%v: item %v: found block group",
+ nodeRef.Addr, i)
+ result.FoundBlockGroups = append(result.FoundBlockGroups, SysBlockGroup{
+ Key: item.Key,
+ BG: *itemBody,
+ })
+ case *btrfsitem.Error:
+ dlog.Errorf(ctx, "node@%v: item %v: error: malformed BLOCK_GROUP_ITEM: %v",
+ nodeRef.Addr, i, itemBody.Err)
+ default:
+ panic(fmt.Errorf("should not happen: BLOCK_GROUP_ITEM has unexpected item type: %T", itemBody))
+ }
+ case btrfsitem.DEV_EXTENT_KEY:
+ switch itemBody := item.Body.(type) {
+ case *btrfsitem.DevExtent:
+ dlog.Tracef(ctx, "node@%v: item %v: found dev extent",
+ nodeRef.Addr, i)
+ result.FoundDevExtents = append(result.FoundDevExtents, SysDevExtent{
+ Key: item.Key,
+ DevExt: *itemBody,
+ })
+ case *btrfsitem.Error:
+ dlog.Errorf(ctx, "node@%v: item %v: error: malformed DEV_EXTENT: %v",
+ nodeRef.Addr, i, itemBody.Err)
+ default:
+ panic(fmt.Errorf("should not happen: DEV_EXTENT has unexpected item type: %T", itemBody))
+ }
+ case btrfsitem.EXTENT_CSUM_KEY:
+ switch itemBody := item.Body.(type) {
+ case *btrfsitem.ExtentCSum:
+ dlog.Tracef(ctx, "node@%v: item %v: found csums",
+ nodeRef.Addr, i)
+ result.FoundExtentCSums = append(result.FoundExtentCSums, SysExtentCSum{
+ Generation: nodeRef.Data.Head.Generation,
+ Sums: *itemBody,
+ })
+ case *btrfsitem.Error:
+ dlog.Errorf(ctx, "node@%v: item %v: error: malformed is EXTENT_CSUM: %v",
+ nodeRef.Addr, i, itemBody.Err)
+ default:
+ panic(fmt.Errorf("should not happen: EXTENT_CSUM has unexpected item type: %T", itemBody))
+ }
+ }
+ }
+ minNextNode = pos + btrfsvol.PhysicalAddr(sb.NodeSize)
+ }
+ btrfstree.FreeNodeRef(nodeRef)
+ }
+ }
+ progress(devSize)
+ progressWriter.Done()
+
+ result.Checksums = btrfssum.SumRun[btrfsvol.PhysicalAddr]{
+ ChecksumSize: csumSize,
+ Sums: btrfssum.ShortSum(sums.String()),
+ }
+
+ return result, nil
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go b/cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go
new file mode 100644
index 0000000..f79e2be
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go
@@ -0,0 +1,167 @@
+// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package rebuildmappings
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "math"
+
+ "git.lukeshu.com/go/lowmemjson"
+
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+ "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+ "git.lukeshu.com/btrfs-progs-ng/lib/slices"
+)
+
+type SumRunWithGaps[Addr btrfsvol.IntAddr[Addr]] struct {
+ // Store the start address and size, in order to facilitate
+ // leading and trailing gaps.
+ Addr Addr
+ Size btrfsvol.AddrDelta
+
+ Runs []btrfssum.SumRun[Addr]
+}
+
+var (
+ _ lowmemjson.Encodable = SumRunWithGaps[btrfsvol.LogicalAddr]{}
+ _ lowmemjson.Decodable = (*SumRunWithGaps[btrfsvol.LogicalAddr])(nil)
+)
+
+// PatLen implements kmpPattern[int, ShortSum].
+func (sg SumRunWithGaps[Addr]) PatLen() int {
+ return int(sg.Size / btrfssum.BlockSize)
+}
+
+func (sg SumRunWithGaps[Addr]) PctFull() float64 {
+ total := sg.PatLen()
+ var full int
+ for _, run := range sg.Runs {
+ full += run.SeqLen()
+ }
+ return float64(full) / float64(total)
+}
+
+func (sg SumRunWithGaps[Addr]) RunForAddr(addr Addr) (btrfssum.SumRun[Addr], Addr, bool) {
+ for _, run := range sg.Runs {
+ if run.Addr > addr {
+ return btrfssum.SumRun[Addr]{}, run.Addr, false
+ }
+ if run.Addr.Add(run.Size()) <= addr {
+ continue
+ }
+ return run, 0, true
+ }
+ return btrfssum.SumRun[Addr]{}, math.MaxInt64, false
+}
+
+func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, bool) {
+ if addr < sg.Addr || addr >= sg.Addr.Add(sg.Size) {
+ return "", false
+ }
+ runIdx, ok := slices.Search(sg.Runs, func(run btrfssum.SumRun[Addr]) int {
+ switch {
+ case addr < run.Addr:
+ return -1
+ case addr >= run.Addr.Add(run.Size()):
+ return 1
+ default:
+ return 0
+ }
+ })
+ if !ok {
+ return "", false
+ }
+ run := sg.Runs[runIdx]
+ off := int((addr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize
+ return run.Sums[off : off+run.ChecksumSize], true
+}
+
+func (sg SumRunWithGaps[Addr]) Walk(ctx context.Context, fn func(Addr, btrfssum.ShortSum) error) error {
+ for _, run := range sg.Runs {
+ if err := run.Walk(ctx, fn); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// PatGet implements kmpPattern[int, ShortSum].
+func (sg SumRunWithGaps[Addr]) PatGet(sumIdx int) (btrfssum.ShortSum, bool) {
+ addr := sg.Addr.Add(btrfsvol.AddrDelta(sumIdx) * btrfssum.BlockSize)
+ return sg.SumForAddr(addr)
+}
+
+func (sg SumRunWithGaps[Addr]) EncodeJSON(w io.Writer) error {
+ if _, err := fmt.Fprintf(w, `{"Addr":%d,"Size":%d,"Runs":[`, sg.Addr, sg.Size); err != nil {
+ return err
+ }
+ cur := sg.Addr
+ for i, run := range sg.Runs {
+ if i > 0 {
+ if _, err := w.Write([]byte{','}); err != nil {
+ return err
+ }
+ }
+ switch {
+ case run.Addr < cur:
+ return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, run.Addr, cur)
+ case run.Addr > cur:
+ if _, err := fmt.Fprintf(w, `{"Gap":%d},`, run.Addr.Sub(cur)); err != nil {
+ return err
+ }
+ fallthrough
+ default:
+ if err := lowmemjson.NewEncoder(w).Encode(run); err != nil {
+ return err
+ }
+ cur = run.Addr.Add(run.Size())
+ }
+ }
+ end := sg.Addr.Add(sg.Size)
+ switch {
+ case end < cur:
+ return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, end, cur)
+ case end > cur:
+ if _, err := fmt.Fprintf(w, `,{"Gap":%d}`, end.Sub(cur)); err != nil {
+ return err
+ }
+ }
+ if _, err := w.Write([]byte("]}")); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (sg *SumRunWithGaps[Addr]) DecodeJSON(r io.RuneScanner) error {
+ *sg = SumRunWithGaps[Addr]{}
+ var name string
+ return lowmemjson.DecodeObject(r,
+ func(r io.RuneScanner) error {
+ return lowmemjson.NewDecoder(r).Decode(&name)
+ },
+ func(r io.RuneScanner) error {
+ switch name {
+ case "Addr":
+ return lowmemjson.NewDecoder(r).Decode(&sg.Addr)
+ case "Size":
+ return lowmemjson.NewDecoder(r).Decode(&sg.Size)
+ case "Runs":
+ return lowmemjson.DecodeArray(r, func(r io.RuneScanner) error {
+ var run btrfssum.SumRun[Addr]
+ if err := lowmemjson.NewDecoder(r).Decode(&run); err != nil {
+ return err
+ }
+ if run.ChecksumSize > 0 {
+ sg.Runs = append(sg.Runs, run)
+ }
+ return nil
+ })
+ default:
+ return fmt.Errorf("unknown key %q", name)
+ }
+ })
+}
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
new file mode 100644
index 0000000..9d14adf
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("1")
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
new file mode 100644
index 0000000..269f061
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("0")
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
new file mode 100644
index 0000000..b8f1562
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("0")
+[]byte("")
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
new file mode 100644
index 0000000..be67506
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("\xde\xdb!")
+[]byte("\xde\xdb")
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
new file mode 100644
index 0000000..c3bfa37
--- /dev/null
+++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d
@@ -0,0 +1,3 @@
+go test fuzz v1
+[]byte("\x10\x10\x15")
+[]byte("\x10\x15")