diff options
Diffstat (limited to 'cmd/btrfs-rec/inspect/rebuildmappings')
15 files changed, 1536 insertions, 0 deletions
diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go b/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go new file mode 100644 index 0000000..20772ba --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/kmp.go @@ -0,0 +1,113 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "errors" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +type kmpPattern[K ~int64 | ~int, V comparable] interface { + PatLen() K + // Get the value at 'pos' in the sequence. Positions start at + // 0 and increment naturally. It is invalid to call Get(pos) + // with a pos that is >= Len(). If there is a gap/wildcard at + // pos, ok is false. + PatGet(pos K) (v V, ok bool) +} + +func kmpSelfEq[K ~int64 | ~int, V comparable](s kmpPattern[K, V], aI K, bI K) bool { + aV, aOK := s.PatGet(aI) + bV, bOK := s.PatGet(bI) + if !aOK || !bOK { + return true + } + return aV == bV +} + +// buildKMPTable takes the string 'substr', and returns a table such +// that 'table[matchLen-1]' is the largest value 'val' for which 'val < matchLen' and +// 'substr[:val] == substr[matchLen-val:matchLen]'. +func buildKMPTable[K ~int64 | ~int, V comparable](substr kmpPattern[K, V]) []K { + substrLen := substr.PatLen() + + table := make([]K, substrLen) + for j := K(0); j < substrLen; j++ { + if j == 0 { + // First entry must always be 0 (in order to + // satisfy 'val < matchLen'). + continue + } + val := table[j-1] + // not a match; go back + for val > 0 && !kmpSelfEq(substr, j, val) { + val = table[val-1] + } + // is a match; go forward + if kmpSelfEq(substr, val, j) { + val++ + } + table[j] = val + } + return table +} + +func kmpEq[K ~int64 | ~int, V comparable](aV V, bS kmpPattern[K, V], bI K) bool { + bV, ok := bS.PatGet(bI) + if !ok { + return true + } + return aV == bV +} + +// indexAll returns the starting-position of all possibly-overlapping +// occurrences of 'substr' in the 'str' sequence. +// +// Will hop around in 'substr', but will only get the natural sequence +// [0...) in order from 'str'. +// +// Will panic if the length of 'substr' is 0. +// +// The 'substr' may include wildcard characters by returning +// ErrWildcard for a position. +// +// Uses the Knuth-Morris-Pratt algorithm. +func indexAll[K ~int64 | ~int, V comparable](str diskio.Sequence[K, V], substr kmpPattern[K, V]) []K { + table := buildKMPTable(substr) + substrLen := K(len(table)) + if substrLen == 0 { + panic(errors.New("rebuildmappings.IndexAll: empty substring")) + } + + var matches []K + var curMatchBeg K + var curMatchLen K + + strLen := str.SeqLen() + for pos := K(0); pos < strLen; pos++ { + chr := str.SeqGet(pos) + + // Consider 'chr' + for curMatchLen > 0 && !kmpEq(chr, substr, curMatchLen) { // shorten the match + overlap := table[curMatchLen-1] + curMatchBeg += curMatchLen - overlap + curMatchLen = overlap + } + if kmpEq(chr, substr, curMatchLen) { // lengthen the match + if curMatchLen == 0 { + curMatchBeg = pos + } + curMatchLen++ + if curMatchLen == substrLen { + matches = append(matches, curMatchBeg) + overlap := table[curMatchLen-1] + curMatchBeg += curMatchLen - overlap + curMatchLen = overlap + } + } + } + return matches +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go b/cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go new file mode 100644 index 0000000..acec9b8 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/kmp_test.go @@ -0,0 +1,126 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "git.lukeshu.com/btrfs-progs-ng/lib/diskio" +) + +type bytePattern[K ~int64 | ~int] []byte + +var _ kmpPattern[int, byte] = bytePattern[int]{} + +// PatLen implements kmpPattern. +func (s bytePattern[K]) PatLen() K { + return K(len(s)) +} + +// PatGet implements kmpPattern. +func (s bytePattern[K]) PatGet(i K) (byte, bool) { + chr := s[int(i)] + if chr == '.' { + return 0, false + } + return chr, true +} + +func TestBuildKMPTable(t *testing.T) { + t.Parallel() + substr := bytePattern[int64]([]byte("ababaa")) + table := buildKMPTable[int64, byte](substr) + require.Equal(t, + []int64{0, 0, 1, 2, 3, 1}, + table) + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], + "for table[%d]=%d", j, val) + } +} + +func FuzzBuildKMPTable(f *testing.F) { + f.Add([]byte("ababaa")) + f.Fuzz(func(t *testing.T, substr []byte) { + table := buildKMPTable[int64, byte](bytePattern[int64](substr)) + require.Equal(t, len(substr), len(table), "length") + for j, val := range table { + matchLen := j + 1 + assert.Equalf(t, substr[:val], substr[matchLen-int(val):matchLen], + "for table[%d]=%d", j, val) + } + }) +} + +func NaiveIndexAll(str, substr []byte) []int64 { + var matches []int64 + for i := range str { + if bytes.HasPrefix(str[i:], substr) { + matches = append(matches, int64(i)) + } + } + return matches +} + +func FuzzIndexAll(f *testing.F) { + f.Fuzz(func(t *testing.T, str, substr []byte) { + if len(substr) == 0 { + t.Skip() + } + t.Logf("str =%q", str) + t.Logf("substr=%q", substr) + exp := NaiveIndexAll(str, substr) + act := indexAll[int64, byte]( + diskio.SliceSequence[int64, byte](str), + bytePattern[int64](substr)) + assert.Equal(t, exp, act) + }) +} + +func TestKMPWildcard(t *testing.T) { + t.Parallel() + type testcase struct { + InStr string + InSubstr string + ExpMatches []int64 + } + testcases := map[string]testcase{ + "trivial-bar": { + InStr: "foo_bar", + InSubstr: "foo.ba.", + ExpMatches: []int64{0}, + }, + "trival-baz": { + InStr: "foo-baz", + InSubstr: "foo.ba.", + ExpMatches: []int64{0}, + }, + "suffix": { + InStr: "foobarbaz", + InSubstr: "...baz", + ExpMatches: []int64{3}, + }, + "overlap": { + InStr: "foobarbar", + InSubstr: "...bar", + ExpMatches: []int64{0, 3}, + }, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + matches := indexAll[int64, byte]( + diskio.StringSequence[int64](tc.InStr), + bytePattern[int64](tc.InSubstr)) + assert.Equal(t, tc.ExpMatches, matches) + }) + } +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process.go b/cmd/btrfs-rec/inspect/rebuildmappings/process.go new file mode 100644 index 0000000..cdf5e5a --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/process.go @@ -0,0 +1,222 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "fmt" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +func getNodeSize(fs *btrfs.FS) (btrfsvol.AddrDelta, error) { + sb, err := fs.Superblock() + if err != nil { + return 0, err + } + return btrfsvol.AddrDelta(sb.NodeSize), nil +} + +func RebuildMappings(ctx context.Context, fs *btrfs.FS, scanResults btrfsinspect.ScanDevicesResult) error { + nodeSize, err := getNodeSize(fs) + if err != nil { + return err + } + + var numChunks, numDevExts, numBlockGroups, numNodes int + devIDs := maps.SortedKeys(scanResults) + devices := fs.LV.PhysicalVolumes() + for _, devID := range devIDs { + if _, ok := devices[devID]; !ok { + return fmt.Errorf("device ID %v mentioned in scan results is not part of the filesystem", devID) + } + devResults := scanResults[devID] + numChunks += len(devResults.FoundChunks) + numDevExts += len(devResults.FoundDevExtents) + numBlockGroups += len(devResults.FoundBlockGroups) + for _, paddrs := range devResults.FoundNodes { + numNodes += len(paddrs) + } + } + dlog.Infof(ctx, "plan: 1/6 process %d chunks", numChunks) + dlog.Infof(ctx, "plan: 2/6 process %d device extents", numDevExts) + dlog.Infof(ctx, "plan: 3/6 process %d nodes", numNodes) + dlog.Infof(ctx, "plan: 4/6 process %d block groups", numBlockGroups) + dlog.Infof(ctx, "plan: 5/6 search for block groups in checksum map (exact)") + dlog.Infof(ctx, "plan: 6/6 search for block groups in checksum map (fuzzy)") + + _ctx := ctx + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "1/6") + dlog.Infof(_ctx, "1/6: Processing %d chunks...", numChunks) + for _, devID := range devIDs { + devResults := scanResults[devID] + for _, chunk := range devResults.FoundChunks { + for _, mapping := range chunk.Chunk.Mappings(chunk.Key) { + if err := fs.LV.AddMapping(mapping); err != nil { + dlog.Errorf(ctx, "error: adding chunk: %v", err) + } + } + } + } + dlog.Info(_ctx, "... done processing chunks") + + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "2/6") + dlog.Infof(_ctx, "2/6: Processing %d device extents...", numDevExts) + for _, devID := range devIDs { + devResults := scanResults[devID] + for _, ext := range devResults.FoundDevExtents { + if err := fs.LV.AddMapping(ext.DevExt.Mapping(ext.Key)); err != nil { + dlog.Errorf(ctx, "error: adding devext: %v", err) + } + } + } + dlog.Info(_ctx, "... done processing device extents") + + // Do the nodes "last" to avoid bloating the mappings table + // too much. (Because nodes are numerous and small, while the + // others are few and large; so it is likely that many of the + // nodes will be subsumed by other things.) + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "3/6") + dlog.Infof(_ctx, "3/6: Processing %d nodes...", numNodes) + for _, devID := range devIDs { + devResults := scanResults[devID] + // Sort them so that progress numbers are predictable. + for _, laddr := range maps.SortedKeys(devResults.FoundNodes) { + for _, paddr := range devResults.FoundNodes[laddr] { + if err := fs.LV.AddMapping(btrfsvol.Mapping{ + LAddr: laddr, + PAddr: btrfsvol.QualifiedPhysicalAddr{ + Dev: devID, + Addr: paddr, + }, + Size: nodeSize, + SizeLocked: false, + }); err != nil { + dlog.Errorf(ctx, "error: adding node ident: %v", err) + } + } + } + } + dlog.Info(_ctx, "... done processing nodes") + + // Use block groups to add missing flags (and as a hint to + // combine node entries). + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "4/6") + dlog.Infof(_ctx, "4/6: Processing %d block groups...", numBlockGroups) + // First dedup them, because they change for allocations and + // CoW means that they'll bounce around a lot, so you likely + // have oodles of duplicates? + bgs, err := DedupBlockGroups(scanResults) + if err != nil { + return err + } + dlog.Infof(ctx, "... de-duplicated to %d block groups", len(bgs)) + for _, bgLAddr := range maps.SortedKeys(bgs) { + bg := bgs[bgLAddr] + otherLAddr, otherPAddr := fs.LV.ResolveAny(bg.LAddr, bg.Size) + if otherLAddr < 0 || otherPAddr.Addr < 0 { + dlog.Errorf(ctx, "error: could not pair blockgroup laddr=%v (size=%v flags=%v) with a mapping", + bg.LAddr, bg.Size, bg.Flags) + continue + } + + offsetWithinChunk := otherLAddr.Sub(bg.LAddr) + mapping := btrfsvol.Mapping{ + LAddr: bg.LAddr, + PAddr: otherPAddr.Add(-offsetWithinChunk), + Size: bg.Size, + SizeLocked: true, + Flags: containers.Optional[btrfsvol.BlockGroupFlags]{ + OK: true, + Val: bg.Flags, + }, + } + if err := fs.LV.AddMapping(mapping); err != nil { + dlog.Errorf(ctx, "error: adding flags from blockgroup: %v", err) + continue + } + delete(bgs, bgLAddr) + } + dlog.Info(_ctx, "... done processing block groups") + + // More than once, I've been tempted to get rid of this exact-search step and just have the fuzzy-search step. + // After all, looking at the timestamps in the log, it's faster per blockgroup! For some background, the big-O + // for each (per blockgroup) looks like: + // + // - exact-search: O(bgSize+physicalBlocks) + // - fuzzy-search: O(bgSize*physicalBlocks) worst-case; O(bgSize*log(physicalBlocks)) expected + // + // The fuzzy-search is only fast because the exact-search is so good at getting `physicalBlocks` down. + // Empirically: if I remove the exact-search step, then the fuzzy-match step is more than an order of magnitude + // slower. + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "5/6") + dlog.Infof(_ctx, "5/6: Searching for %d block groups in checksum map (exact)...", len(bgs)) + physicalSums := ExtractPhysicalSums(scanResults) + logicalSums := ExtractLogicalSums(ctx, scanResults) + if err := matchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil { + return err + } + dlog.Info(ctx, "... done searching for exact block groups") + + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "6/6") + dlog.Infof(_ctx, "6/6: Searching for %d block groups in checksum map (fuzzy)...", len(bgs)) + if err := fuzzyMatchBlockGroupSums(ctx, fs, bgs, physicalSums, logicalSums); err != nil { + return err + } + dlog.Info(_ctx, "... done searching for fuzzy block groups") + + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.step", "report") + dlog.Info(_ctx, "report:") + + unmappedPhysicalRegions := ListUnmappedPhysicalRegions(fs) + var unmappedPhysical btrfsvol.AddrDelta + var numUnmappedPhysical int + for _, devRegions := range unmappedPhysicalRegions { + numUnmappedPhysical += len(devRegions) + for _, region := range devRegions { + unmappedPhysical += region.End.Sub(region.Beg) + } + } + dlog.Infof(ctx, "... %d of unmapped physical space (across %d regions)", textui.IEC(unmappedPhysical, "B"), numUnmappedPhysical) + + unmappedLogicalRegions := ListUnmappedLogicalRegions(fs, logicalSums) + var unmappedLogical btrfsvol.AddrDelta + for _, region := range unmappedLogicalRegions { + unmappedLogical += region.Size() + } + dlog.Infof(ctx, "... %d of unmapped summed logical space (across %d regions)", textui.IEC(unmappedLogical, "B"), len(unmappedLogicalRegions)) + + var unmappedBlockGroups btrfsvol.AddrDelta + for _, bg := range bgs { + unmappedBlockGroups += bg.Size + } + dlog.Infof(ctx, "... %d of unmapped block groups (across %d groups)", textui.IEC(unmappedBlockGroups, "B"), len(bgs)) + + dlog.Info(_ctx, "detailed report:") + for _, devID := range maps.SortedKeys(unmappedPhysicalRegions) { + for _, region := range unmappedPhysicalRegions[devID] { + dlog.Infof(ctx, "... unmapped physical region: dev=%v beg=%v end=%v (size=%v)", + devID, region.Beg, region.End, region.End.Sub(region.Beg)) + } + } + for _, region := range unmappedLogicalRegions { + dlog.Infof(ctx, "... umapped summed logical region: beg=%v end=%v (size=%v)", + region.Addr, region.Addr.Add(region.Size()), region.Size()) + } + for _, laddr := range maps.SortedKeys(bgs) { + bg := bgs[laddr] + dlog.Infof(ctx, "... umapped block group: beg=%v end=%v (size=%v) flags=%v", + bg.LAddr, bg.LAddr.Add(bg.Size), bg.Size, bg.Flags) + } + + return nil +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go new file mode 100644 index 0000000..0e2d5a0 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_blockgroups.go @@ -0,0 +1,58 @@ +// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "fmt" + "sort" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" +) + +type BlockGroup struct { + LAddr btrfsvol.LogicalAddr + Size btrfsvol.AddrDelta + Flags btrfsvol.BlockGroupFlags +} + +func DedupBlockGroups(scanResults btrfsinspect.ScanDevicesResult) (map[btrfsvol.LogicalAddr]BlockGroup, error) { + // Dedup + bgsSet := make(containers.Set[BlockGroup]) + for _, devResults := range scanResults { + for _, bg := range devResults.FoundBlockGroups { + bgsSet.Insert(BlockGroup{ + LAddr: btrfsvol.LogicalAddr(bg.Key.ObjectID), + Size: btrfsvol.AddrDelta(bg.Key.Offset), + Flags: bg.BG.Flags, + }) + } + } + + // Sort + bgsOrdered := maps.Keys(bgsSet) + sort.Slice(bgsOrdered, func(i, j int) bool { + return bgsOrdered[i].LAddr < bgsOrdered[j].LAddr + }) + + // Sanity check + var pos btrfsvol.LogicalAddr + for _, bg := range bgsOrdered { + if bg.LAddr < pos || bg.Size < 0 { + return nil, fmt.Errorf("found block groups are inconsistent") + } + pos = bg.LAddr.Add(bg.Size) + } + + // Return. We return a map instead of a slice in order to + // facilitate easy deletes. + bgsMap := make(map[btrfsvol.LogicalAddr]BlockGroup, len(bgsSet)) + for bg := range bgsSet { + bgsMap[bg.LAddr] = bg + } + return bgsMap, nil +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go new file mode 100644 index 0000000..a3e724e --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_exact.go @@ -0,0 +1,78 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + + "github.com/datawire/dlib/dlog" + "golang.org/x/text/number" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" +) + +func matchBlockGroupSums(ctx context.Context, + fs *btrfs.FS, + blockgroups map[btrfsvol.LogicalAddr]BlockGroup, + physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], + logicalSums SumRunWithGaps[btrfsvol.LogicalAddr], +) error { + regions := ListUnmappedPhysicalRegions(fs) + numBlockgroups := len(blockgroups) + for i, bgLAddr := range maps.SortedKeys(blockgroups) { + blockgroup := blockgroups[bgLAddr] + bgRun := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size) + if len(bgRun.Runs) == 0 { + dlog.Errorf(ctx, "(%v/%v) blockgroup[laddr=%v] can't be matched because it has 0 runs", + i+1, numBlockgroups, bgLAddr) + continue + } + + var matches []btrfsvol.QualifiedPhysicalAddr + if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error { + rawMatches := indexAll[int, btrfssum.ShortSum](region, bgRun) + for _, match := range rawMatches { + matches = append(matches, btrfsvol.QualifiedPhysicalAddr{ + Dev: devID, + Addr: region.Addr + (btrfsvol.PhysicalAddr(match) * btrfssum.BlockSize), + }) + } + return nil + }); err != nil { + return err + } + + lvl := dlog.LogLevelError + if len(matches) == 1 { + lvl = dlog.LogLevelInfo + } + dlog.Logf(ctx, lvl, "(%v/%v) blockgroup[laddr=%v] has %v matches based on %v coverage from %v runs", + i+1, numBlockgroups, bgLAddr, len(matches), number.Percent(bgRun.PctFull()), len(bgRun.Runs)) + if len(matches) != 1 { + continue + } + + mapping := btrfsvol.Mapping{ + LAddr: blockgroup.LAddr, + PAddr: matches[0], + Size: blockgroup.Size, + SizeLocked: true, + Flags: containers.Optional[btrfsvol.BlockGroupFlags]{ + OK: true, + Val: blockgroup.Flags, + }, + } + if err := fs.LV.AddMapping(mapping); err != nil { + dlog.Errorf(ctx, "error: %v", err) + continue + } + delete(blockgroups, bgLAddr) + } + return nil +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go new file mode 100644 index 0000000..4724c12 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_matchsums_fuzzy.go @@ -0,0 +1,159 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "sort" + + "github.com/datawire/dlib/dlog" + "golang.org/x/text/number" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +var minFuzzyPct = textui.Tunable(0.5) + +type fuzzyRecord struct { + PAddr btrfsvol.QualifiedPhysicalAddr + N int +} + +func (a fuzzyRecord) Compare(b fuzzyRecord) int { + switch { + case a.N < b.N: + return -1 + case a.N > b.N: + return 1 + default: + return 0 + } +} + +func fuzzyMatchBlockGroupSums(ctx context.Context, + fs *btrfs.FS, + blockgroups map[btrfsvol.LogicalAddr]BlockGroup, + physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], + logicalSums SumRunWithGaps[btrfsvol.LogicalAddr], +) error { + _ctx := ctx + + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.substep", "indexing") + dlog.Info(ctx, "Indexing physical regions...") // O(m) + regions := ListUnmappedPhysicalRegions(fs) + physicalIndex := make(map[btrfssum.ShortSum][]btrfsvol.QualifiedPhysicalAddr) + if err := WalkUnmappedPhysicalRegions(ctx, physicalSums, regions, func(devID btrfsvol.DeviceID, region btrfssum.SumRun[btrfsvol.PhysicalAddr]) error { + return region.Walk(ctx, func(paddr btrfsvol.PhysicalAddr, sum btrfssum.ShortSum) error { + physicalIndex[sum] = append(physicalIndex[sum], btrfsvol.QualifiedPhysicalAddr{ + Dev: devID, + Addr: paddr, + }) + return nil + }) + }); err != nil { + return err + } + dlog.Info(ctx, "... done indexing") + + ctx = dlog.WithField(_ctx, "btrfsinspect.rebuild-mappings.substep", "searching") + dlog.Info(ctx, "Searching...") + numBlockgroups := len(blockgroups) + for i, bgLAddr := range maps.SortedKeys(blockgroups) { + blockgroup := blockgroups[bgLAddr] + bgRun := SumsForLogicalRegion(logicalSums, blockgroup.LAddr, blockgroup.Size) + + d := bgRun.PatLen() + matches := make(map[btrfsvol.QualifiedPhysicalAddr]int) + if err := bgRun.Walk(ctx, func(laddr btrfsvol.LogicalAddr, sum btrfssum.ShortSum) error { // O(n*… + off := laddr.Sub(bgRun.Addr) + for _, paddr := range physicalIndex[sum] { // …log(m)) expected but …m) worst + key := btrfsvol.QualifiedPhysicalAddr{ + Dev: paddr.Dev, + Addr: paddr.Addr.Add(-off), + } + matches[key]++ + } + return nil + }); err != nil { + return err + } + + best := lowestN[fuzzyRecord]{N: 2} + for paddr, n := range matches { // O(m) + best.Insert(fuzzyRecord{ + PAddr: paddr, + N: d - n, + }) + } + + var apply bool + var matchesStr string + switch len(best.Dat) { + case 0: // can happen if there are no sums in the run + matchesStr = "" + case 1: // not sure how this can happen, but whatev + pct := float64(d-best.Dat[0].N) / float64(d) + matchesStr = textui.Sprintf("%v", number.Percent(pct)) + apply = pct > minFuzzyPct + case 2: + pct := float64(d-best.Dat[0].N) / float64(d) + pct2 := float64(d-best.Dat[1].N) / float64(d) + matchesStr = textui.Sprintf("best=%v secondbest=%v", number.Percent(pct), number.Percent(pct2)) + apply = pct > minFuzzyPct && pct2 < minFuzzyPct + } + lvl := dlog.LogLevelError + if apply { + lvl = dlog.LogLevelInfo + } + dlog.Logf(ctx, lvl, "(%v/%v) blockgroup[laddr=%v] matches=[%s]; bestpossible=%v (based on %v runs)", + i+1, numBlockgroups, bgLAddr, matchesStr, number.Percent(bgRun.PctFull()), len(bgRun.Runs)) + if !apply { + continue + } + + mapping := btrfsvol.Mapping{ + LAddr: blockgroup.LAddr, + PAddr: best.Dat[0].PAddr, + Size: blockgroup.Size, + SizeLocked: true, + Flags: containers.Optional[btrfsvol.BlockGroupFlags]{ + OK: true, + Val: blockgroup.Flags, + }, + } + if err := fs.LV.AddMapping(mapping); err != nil { + dlog.Errorf(ctx, "error: %v", err) + continue + } + delete(blockgroups, bgLAddr) + } + dlog.Info(ctx, "done searching") + + return nil +} + +type lowestN[T containers.Ordered[T]] struct { + N int + Dat []T +} + +func (l *lowestN[T]) Insert(v T) { + switch { + case len(l.Dat) < l.N: + l.Dat = append(l.Dat, v) + case v.Compare(l.Dat[0]) < 0: + l.Dat[0] = v + default: + return + } + sort.Slice(l.Dat, func(i, j int) bool { + return l.Dat[i].Compare(l.Dat[j]) < 0 + }) +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go new file mode 100644 index 0000000..7c02d05 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_logical.go @@ -0,0 +1,249 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "sort" + "strings" + + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" +) + +func ExtractLogicalSums(ctx context.Context, scanResults btrfsinspect.ScanDevicesResult) SumRunWithGaps[btrfsvol.LogicalAddr] { + var records []btrfsinspect.SysExtentCSum + for _, devResults := range scanResults { + records = append(records, devResults.FoundExtentCSums...) + } + // Sort lower-generation items earlier; then sort by addr. + sort.Slice(records, func(i, j int) bool { + a, b := records[i], records[j] + switch { + case a.Generation < b.Generation: + return true + case a.Generation > b.Generation: + return false + default: + return a.Sums.Addr < b.Sums.Addr + } + }) + if len(records) == 0 { + return SumRunWithGaps[btrfsvol.LogicalAddr]{} + } + sumSize := records[0].Sums.ChecksumSize + + // Now build them in to a coherent address space. + // + // We can't just reverse-sort by generation to avoid mutations, because given + // + // gen1 AAAAAAA + // gen2 BBBBBBBB + // gen3 CCCCCCC + // + // "AAAAAAA" shouldn't be present, and if we just discard "BBBBBBBB" + // because it conflicts with "CCCCCCC", then we would erroneously + // include "AAAAAAA". + addrspace := new(containers.RBTree[btrfsinspect.SysExtentCSum]) + for _, newRecord := range records { + for { + conflict := addrspace.Search(func(oldRecord btrfsinspect.SysExtentCSum) int { + switch { + case newRecord.Sums.Addr.Add(newRecord.Sums.Size()) <= oldRecord.Sums.Addr: + // 'newRecord' is wholly to the left of 'oldRecord'. + return -1 + case oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()) <= newRecord.Sums.Addr: + // 'oldRecord' is wholly to the left of 'newRecord'. + return 1 + default: + // There is some overlap. + return 0 + } + }) + if conflict == nil { + // We can insert it + addrspace.Insert(newRecord) + break + } + oldRecord := conflict.Value + if oldRecord == newRecord { + // Duplicates are to be expected. + break + } + if oldRecord.Generation < newRecord.Generation { + // Newer generation wins. + addrspace.Delete(conflict) + // loop around to check for more conflicts + continue + } + if oldRecord.Generation > newRecord.Generation { + // We sorted them! This shouldn't happen. + panic("should not happen") + } + // Since sums are stored multiple times (RAID?), but may + // be split between items differently between copies, we + // should take the union (after verifying that they + // agree on the overlapping part). + overlapBeg := slices.Max( + oldRecord.Sums.Addr, + newRecord.Sums.Addr) + overlapEnd := slices.Min( + oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()), + newRecord.Sums.Addr.Add(newRecord.Sums.Size())) + + oldOverlapBeg := int(overlapBeg.Sub(oldRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + oldOverlapEnd := int(overlapEnd.Sub(oldRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + oldOverlap := oldRecord.Sums.Sums[oldOverlapBeg:oldOverlapEnd] + + newOverlapBeg := int(overlapBeg.Sub(newRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + newOverlapEnd := int(overlapEnd.Sub(newRecord.Sums.Addr)/btrfssum.BlockSize) * sumSize + newOverlap := newRecord.Sums.Sums[newOverlapBeg:newOverlapEnd] + + if oldOverlap != newOverlap { + dlog.Errorf(ctx, "mismatch: {gen:%v, addr:%v, size:%v} conflicts with {gen:%v, addr:%v, size:%v}", + oldRecord.Generation, oldRecord.Sums.Addr, oldRecord.Sums.Size(), + newRecord.Generation, newRecord.Sums.Addr, newRecord.Sums.Size()) + break + } + // OK, we match, so take the union. + var prefix, suffix btrfssum.ShortSum + switch { + case oldRecord.Sums.Addr < overlapBeg: + prefix = oldRecord.Sums.Sums[:oldOverlapBeg] + case newRecord.Sums.Addr < overlapBeg: + prefix = newRecord.Sums.Sums[:newOverlapBeg] + } + switch { + case oldRecord.Sums.Addr.Add(oldRecord.Sums.Size()) > overlapEnd: + suffix = oldRecord.Sums.Sums[oldOverlapEnd:] + case newRecord.Sums.Addr.Add(newRecord.Sums.Size()) > overlapEnd: + suffix = newRecord.Sums.Sums[newOverlapEnd:] + } + unionRecord := btrfsinspect.SysExtentCSum{ + Generation: oldRecord.Generation, + Sums: btrfsitem.ExtentCSum{ + SumRun: btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: oldRecord.Sums.ChecksumSize, + Addr: slices.Min(oldRecord.Sums.Addr, newRecord.Sums.Addr), + Sums: prefix + oldOverlap + suffix, + }, + }, + } + addrspace.Delete(conflict) + newRecord = unionRecord + // loop around to check for more conflicts + } + } + + // Now flatten that RBTree in to a SumRunWithGaps. + var flattened SumRunWithGaps[btrfsvol.LogicalAddr] + var curAddr btrfsvol.LogicalAddr + var curSums strings.Builder + addrspace.Range(func(node *containers.RBNode[btrfsinspect.SysExtentCSum]) bool { + curEnd := curAddr + (btrfsvol.LogicalAddr(curSums.Len()/sumSize) * btrfssum.BlockSize) + if node.Value.Sums.Addr != curEnd { + if curSums.Len() > 0 { + flattened.Runs = append(flattened.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: sumSize, + Addr: curAddr, + Sums: btrfssum.ShortSum(curSums.String()), + }) + } + curAddr = node.Value.Sums.Addr + curSums.Reset() + } + curSums.WriteString(string(node.Value.Sums.Sums)) + return true + }) + if curSums.Len() > 0 { + flattened.Runs = append(flattened.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: sumSize, + Addr: curAddr, + Sums: btrfssum.ShortSum(curSums.String()), + }) + } + flattened.Addr = flattened.Runs[0].Addr + last := flattened.Runs[len(flattened.Runs)-1] + end := last.Addr.Add(last.Size()) + flattened.Size = end.Sub(flattened.Addr) + + return flattened +} + +func ListUnmappedLogicalRegions(fs *btrfs.FS, logicalSums SumRunWithGaps[btrfsvol.LogicalAddr]) []btrfssum.SumRun[btrfsvol.LogicalAddr] { + // There are a lot of ways this algorithm could be made + // faster. + var ret []btrfssum.SumRun[btrfsvol.LogicalAddr] + var cur struct { + Addr btrfsvol.LogicalAddr + Size btrfsvol.AddrDelta + } + for _, run := range logicalSums.Runs { + for addr := run.Addr; addr < run.Addr.Add(run.Size()); addr += btrfssum.BlockSize { + if _, maxlen := fs.LV.Resolve(addr); maxlen < btrfssum.BlockSize { + if cur.Size == 0 { + cur.Addr = addr + cur.Size = 0 + } + cur.Size += btrfssum.BlockSize + } else if cur.Size > 0 { + begIdx := int(cur.Addr.Sub(run.Addr)/btrfssum.BlockSize) * run.ChecksumSize + lenIdx := (int(cur.Size) / btrfssum.BlockSize) * run.ChecksumSize + endIdx := begIdx + lenIdx + ret = append(ret, btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: run.ChecksumSize, + Addr: cur.Addr, + Sums: run.Sums[begIdx:endIdx], + }) + cur.Size = 0 + } + } + if cur.Size > 0 { + begIdx := int(cur.Addr.Sub(run.Addr)/btrfssum.BlockSize) * run.ChecksumSize + lenIdx := (int(cur.Size) / btrfssum.BlockSize) * run.ChecksumSize + endIdx := begIdx + lenIdx + ret = append(ret, btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: run.ChecksumSize, + Addr: cur.Addr, + Sums: run.Sums[begIdx:endIdx], + }) + cur.Size = 0 + } + } + return ret +} + +func SumsForLogicalRegion(sums SumRunWithGaps[btrfsvol.LogicalAddr], beg btrfsvol.LogicalAddr, size btrfsvol.AddrDelta) SumRunWithGaps[btrfsvol.LogicalAddr] { + runs := SumRunWithGaps[btrfsvol.LogicalAddr]{ + Addr: beg, + Size: size, + } + for laddr := beg; laddr < beg.Add(size); { + run, next, ok := sums.RunForAddr(laddr) + if !ok { + laddr = next + continue + } + off := int((laddr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize + deltaAddr := slices.Min[btrfsvol.AddrDelta]( + size-laddr.Sub(beg), + btrfsvol.AddrDelta((len(run.Sums)-off)/run.ChecksumSize)*btrfssum.BlockSize) + deltaOff := int(deltaAddr/btrfssum.BlockSize) * run.ChecksumSize + runs.Runs = append(runs.Runs, btrfssum.SumRun[btrfsvol.LogicalAddr]{ + ChecksumSize: run.ChecksumSize, + Addr: laddr, + Sums: run.Sums[off : off+deltaOff], + }) + laddr = laddr.Add(deltaAddr) + } + return runs +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go new file mode 100644 index 0000000..da22fbf --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/process_sums_physical.go @@ -0,0 +1,89 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "sort" + + "golang.org/x/exp/constraints" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect" + "git.lukeshu.com/btrfs-progs-ng/lib/maps" +) + +func ExtractPhysicalSums(scanResults btrfsinspect.ScanDevicesResult) map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr] { + ret := make(map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], len(scanResults)) + for devID, devResults := range scanResults { + ret[devID] = devResults.Checksums + } + return ret +} + +type PhysicalRegion struct { + Beg, End btrfsvol.PhysicalAddr +} + +func ListUnmappedPhysicalRegions(fs *btrfs.FS) map[btrfsvol.DeviceID][]PhysicalRegion { + regions := make(map[btrfsvol.DeviceID][]PhysicalRegion) + pos := make(map[btrfsvol.DeviceID]btrfsvol.PhysicalAddr) + mappings := fs.LV.Mappings() + sort.Slice(mappings, func(i, j int) bool { + return mappings[i].PAddr.Compare(mappings[j].PAddr) < 0 + }) + for _, mapping := range mappings { + if pos[mapping.PAddr.Dev] < mapping.PAddr.Addr { + regions[mapping.PAddr.Dev] = append(regions[mapping.PAddr.Dev], PhysicalRegion{ + Beg: pos[mapping.PAddr.Dev], + End: mapping.PAddr.Addr, + }) + } + if pos[mapping.PAddr.Dev] < mapping.PAddr.Addr.Add(mapping.Size) { + pos[mapping.PAddr.Dev] = mapping.PAddr.Addr.Add(mapping.Size) + } + } + for devID, dev := range fs.LV.PhysicalVolumes() { + devSize := dev.Size() + if pos[devID] < devSize { + regions[devID] = append(regions[devID], PhysicalRegion{ + Beg: pos[devID], + End: devSize, + }) + } + } + return regions +} + +func roundUp[T constraints.Integer](x, multiple T) T { + return ((x + multiple - 1) / multiple) * multiple +} + +func WalkUnmappedPhysicalRegions(ctx context.Context, + physicalSums map[btrfsvol.DeviceID]btrfssum.SumRun[btrfsvol.PhysicalAddr], + gaps map[btrfsvol.DeviceID][]PhysicalRegion, + fn func(btrfsvol.DeviceID, btrfssum.SumRun[btrfsvol.PhysicalAddr]) error, +) error { + for _, devID := range maps.SortedKeys(gaps) { + for _, gap := range gaps[devID] { + if err := ctx.Err(); err != nil { + return err + } + begAddr := roundUp(gap.Beg, btrfssum.BlockSize) + begOff := int(begAddr/btrfssum.BlockSize) * physicalSums[devID].ChecksumSize + endOff := int(gap.End/btrfssum.BlockSize) * physicalSums[devID].ChecksumSize + if err := fn(devID, btrfssum.SumRun[btrfsvol.PhysicalAddr]{ + ChecksumSize: physicalSums[devID].ChecksumSize, + Addr: begAddr, + Sums: physicalSums[devID].Sums[begOff:endOff], + }); err != nil { + return err + } + } + } + return nil +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/scan.go b/cmd/btrfs-rec/inspect/rebuildmappings/scan.go new file mode 100644 index 0000000..d54be71 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/scan.go @@ -0,0 +1,260 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package btrfsinspect + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + "time" + + "github.com/datawire/dlib/dgroup" + "github.com/datawire/dlib/dlog" + + "git.lukeshu.com/btrfs-progs-ng/lib/binstruct" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/containers" + "git.lukeshu.com/btrfs-progs-ng/lib/textui" +) + +type ScanDevicesResult map[btrfsvol.DeviceID]ScanOneDeviceResult + +func ScanDevices(ctx context.Context, fs *btrfs.FS) (ScanDevicesResult, error) { + grp := dgroup.NewGroup(ctx, dgroup.GroupConfig{}) + var mu sync.Mutex + result := make(map[btrfsvol.DeviceID]ScanOneDeviceResult) + for id, dev := range fs.LV.PhysicalVolumes() { + id := id + dev := dev + grp.Go(fmt.Sprintf("dev-%d", id), func(ctx context.Context) error { + sb, err := dev.Superblock() + if err != nil { + return err + } + devResult, err := ScanOneDevice(ctx, dev, *sb) + if err != nil { + return err + } + mu.Lock() + result[id] = devResult + mu.Unlock() + return nil + }) + } + if err := grp.Wait(); err != nil { + return nil, err + } + return result, nil +} + +type ScanOneDeviceResult struct { + Checksums btrfssum.SumRun[btrfsvol.PhysicalAddr] + FoundNodes map[btrfsvol.LogicalAddr][]btrfsvol.PhysicalAddr + FoundChunks []btrfstree.SysChunk + FoundBlockGroups []SysBlockGroup + FoundDevExtents []SysDevExtent + FoundExtentCSums []SysExtentCSum +} + +type SysBlockGroup struct { + Key btrfsprim.Key + BG btrfsitem.BlockGroup +} + +type SysDevExtent struct { + Key btrfsprim.Key + DevExt btrfsitem.DevExtent +} + +type SysExtentCSum struct { + Generation btrfsprim.Generation + Sums btrfsitem.ExtentCSum +} + +// Compare implements containers.Ordered. +func (a SysExtentCSum) Compare(b SysExtentCSum) int { + return containers.NativeCompare(a.Sums.Addr, b.Sums.Addr) +} + +type scanStats struct { + textui.Portion[btrfsvol.PhysicalAddr] + + NumFoundNodes int + NumFoundChunks int + NumFoundBlockGroups int + NumFoundDevExtents int + NumFoundExtentCSums int +} + +func (s scanStats) String() string { + return textui.Sprintf("scanned %v (found: %v nodes, %v chunks, %v block groups, %v dev extents, %v sum items)", + s.Portion, + s.NumFoundNodes, + s.NumFoundChunks, + s.NumFoundBlockGroups, + s.NumFoundDevExtents, + s.NumFoundExtentCSums) +} + +var sbSize = btrfsvol.PhysicalAddr(binstruct.StaticSize(btrfstree.Superblock{})) + +// ScanOneDevice mostly mimics btrfs-progs +// cmds/rescue-chunk-recover.c:scan_one_device(). +func ScanOneDevice(ctx context.Context, dev *btrfs.Device, sb btrfstree.Superblock) (ScanOneDeviceResult, error) { + ctx = dlog.WithField(ctx, "btrfsinspect.scandevices.dev", dev.Name()) + + result := ScanOneDeviceResult{ + FoundNodes: make(map[btrfsvol.LogicalAddr][]btrfsvol.PhysicalAddr), + } + + devSize := dev.Size() + if sb.NodeSize < sb.SectorSize { + return result, fmt.Errorf("node_size(%v) < sector_size(%v)", + sb.NodeSize, sb.SectorSize) + } + if sb.SectorSize != btrfssum.BlockSize { + // TODO: probably handle this? + return result, fmt.Errorf("sector_size(%v) != btrfssum.BlockSize", + sb.SectorSize) + } + alg := sb.ChecksumType + csumSize := alg.Size() + numSums := int(devSize / btrfssum.BlockSize) + var sums strings.Builder + sums.Grow(numSums * csumSize) + + progressWriter := textui.NewProgress[scanStats](ctx, dlog.LogLevelInfo, textui.Tunable(1*time.Second)) + progress := func(pos btrfsvol.PhysicalAddr) { + progressWriter.Set(scanStats{ + Portion: textui.Portion[btrfsvol.PhysicalAddr]{ + N: pos, + D: devSize, + }, + NumFoundNodes: len(result.FoundNodes), + NumFoundChunks: len(result.FoundChunks), + NumFoundBlockGroups: len(result.FoundBlockGroups), + NumFoundDevExtents: len(result.FoundDevExtents), + NumFoundExtentCSums: len(result.FoundExtentCSums), + }) + } + + var minNextNode btrfsvol.PhysicalAddr + for i := 0; i < numSums; i++ { + if ctx.Err() != nil { + return result, ctx.Err() + } + pos := btrfsvol.PhysicalAddr(i * btrfssum.BlockSize) + progress(pos) + + sum, err := btrfs.ChecksumPhysical(dev, alg, pos) + if err != nil { + return result, err + } + sums.Write(sum[:csumSize]) + + checkForNode := pos >= minNextNode && pos+btrfsvol.PhysicalAddr(sb.NodeSize) <= devSize + if checkForNode { + for _, sbAddr := range btrfs.SuperblockAddrs { + if sbAddr <= pos && pos < sbAddr+sbSize { + checkForNode = false + break + } + } + } + + if checkForNode { + nodeRef, err := btrfstree.ReadNode[btrfsvol.PhysicalAddr](dev, sb, pos, btrfstree.NodeExpectations{}) + if err != nil { + if !errors.Is(err, btrfstree.ErrNotANode) { + dlog.Errorf(ctx, "error: %v", err) + } + } else { + result.FoundNodes[nodeRef.Data.Head.Addr] = append(result.FoundNodes[nodeRef.Data.Head.Addr], nodeRef.Addr) + for i, item := range nodeRef.Data.BodyLeaf { + switch item.Key.ItemType { + case btrfsitem.CHUNK_ITEM_KEY: + switch itemBody := item.Body.(type) { + case *btrfsitem.Chunk: + dlog.Tracef(ctx, "node@%v: item %v: found chunk", + nodeRef.Addr, i) + result.FoundChunks = append(result.FoundChunks, btrfstree.SysChunk{ + Key: item.Key, + Chunk: *itemBody, + }) + case *btrfsitem.Error: + dlog.Errorf(ctx, "node@%v: item %v: error: malformed CHUNK_ITEM: %v", + nodeRef.Addr, i, itemBody.Err) + default: + panic(fmt.Errorf("should not happen: CHUNK_ITEM has unexpected item type: %T", itemBody)) + } + case btrfsitem.BLOCK_GROUP_ITEM_KEY: + switch itemBody := item.Body.(type) { + case *btrfsitem.BlockGroup: + dlog.Tracef(ctx, "node@%v: item %v: found block group", + nodeRef.Addr, i) + result.FoundBlockGroups = append(result.FoundBlockGroups, SysBlockGroup{ + Key: item.Key, + BG: *itemBody, + }) + case *btrfsitem.Error: + dlog.Errorf(ctx, "node@%v: item %v: error: malformed BLOCK_GROUP_ITEM: %v", + nodeRef.Addr, i, itemBody.Err) + default: + panic(fmt.Errorf("should not happen: BLOCK_GROUP_ITEM has unexpected item type: %T", itemBody)) + } + case btrfsitem.DEV_EXTENT_KEY: + switch itemBody := item.Body.(type) { + case *btrfsitem.DevExtent: + dlog.Tracef(ctx, "node@%v: item %v: found dev extent", + nodeRef.Addr, i) + result.FoundDevExtents = append(result.FoundDevExtents, SysDevExtent{ + Key: item.Key, + DevExt: *itemBody, + }) + case *btrfsitem.Error: + dlog.Errorf(ctx, "node@%v: item %v: error: malformed DEV_EXTENT: %v", + nodeRef.Addr, i, itemBody.Err) + default: + panic(fmt.Errorf("should not happen: DEV_EXTENT has unexpected item type: %T", itemBody)) + } + case btrfsitem.EXTENT_CSUM_KEY: + switch itemBody := item.Body.(type) { + case *btrfsitem.ExtentCSum: + dlog.Tracef(ctx, "node@%v: item %v: found csums", + nodeRef.Addr, i) + result.FoundExtentCSums = append(result.FoundExtentCSums, SysExtentCSum{ + Generation: nodeRef.Data.Head.Generation, + Sums: *itemBody, + }) + case *btrfsitem.Error: + dlog.Errorf(ctx, "node@%v: item %v: error: malformed is EXTENT_CSUM: %v", + nodeRef.Addr, i, itemBody.Err) + default: + panic(fmt.Errorf("should not happen: EXTENT_CSUM has unexpected item type: %T", itemBody)) + } + } + } + minNextNode = pos + btrfsvol.PhysicalAddr(sb.NodeSize) + } + btrfstree.FreeNodeRef(nodeRef) + } + } + progress(devSize) + progressWriter.Done() + + result.Checksums = btrfssum.SumRun[btrfsvol.PhysicalAddr]{ + ChecksumSize: csumSize, + Sums: btrfssum.ShortSum(sums.String()), + } + + return result, nil +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go b/cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go new file mode 100644 index 0000000..f79e2be --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/sumrunwithgaps.go @@ -0,0 +1,167 @@ +// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package rebuildmappings + +import ( + "context" + "fmt" + "io" + "math" + + "git.lukeshu.com/go/lowmemjson" + + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum" + "git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol" + "git.lukeshu.com/btrfs-progs-ng/lib/slices" +) + +type SumRunWithGaps[Addr btrfsvol.IntAddr[Addr]] struct { + // Store the start address and size, in order to facilitate + // leading and trailing gaps. + Addr Addr + Size btrfsvol.AddrDelta + + Runs []btrfssum.SumRun[Addr] +} + +var ( + _ lowmemjson.Encodable = SumRunWithGaps[btrfsvol.LogicalAddr]{} + _ lowmemjson.Decodable = (*SumRunWithGaps[btrfsvol.LogicalAddr])(nil) +) + +// PatLen implements kmpPattern[int, ShortSum]. +func (sg SumRunWithGaps[Addr]) PatLen() int { + return int(sg.Size / btrfssum.BlockSize) +} + +func (sg SumRunWithGaps[Addr]) PctFull() float64 { + total := sg.PatLen() + var full int + for _, run := range sg.Runs { + full += run.SeqLen() + } + return float64(full) / float64(total) +} + +func (sg SumRunWithGaps[Addr]) RunForAddr(addr Addr) (btrfssum.SumRun[Addr], Addr, bool) { + for _, run := range sg.Runs { + if run.Addr > addr { + return btrfssum.SumRun[Addr]{}, run.Addr, false + } + if run.Addr.Add(run.Size()) <= addr { + continue + } + return run, 0, true + } + return btrfssum.SumRun[Addr]{}, math.MaxInt64, false +} + +func (sg SumRunWithGaps[Addr]) SumForAddr(addr Addr) (btrfssum.ShortSum, bool) { + if addr < sg.Addr || addr >= sg.Addr.Add(sg.Size) { + return "", false + } + runIdx, ok := slices.Search(sg.Runs, func(run btrfssum.SumRun[Addr]) int { + switch { + case addr < run.Addr: + return -1 + case addr >= run.Addr.Add(run.Size()): + return 1 + default: + return 0 + } + }) + if !ok { + return "", false + } + run := sg.Runs[runIdx] + off := int((addr-run.Addr)/btrfssum.BlockSize) * run.ChecksumSize + return run.Sums[off : off+run.ChecksumSize], true +} + +func (sg SumRunWithGaps[Addr]) Walk(ctx context.Context, fn func(Addr, btrfssum.ShortSum) error) error { + for _, run := range sg.Runs { + if err := run.Walk(ctx, fn); err != nil { + return err + } + } + return nil +} + +// PatGet implements kmpPattern[int, ShortSum]. +func (sg SumRunWithGaps[Addr]) PatGet(sumIdx int) (btrfssum.ShortSum, bool) { + addr := sg.Addr.Add(btrfsvol.AddrDelta(sumIdx) * btrfssum.BlockSize) + return sg.SumForAddr(addr) +} + +func (sg SumRunWithGaps[Addr]) EncodeJSON(w io.Writer) error { + if _, err := fmt.Fprintf(w, `{"Addr":%d,"Size":%d,"Runs":[`, sg.Addr, sg.Size); err != nil { + return err + } + cur := sg.Addr + for i, run := range sg.Runs { + if i > 0 { + if _, err := w.Write([]byte{','}); err != nil { + return err + } + } + switch { + case run.Addr < cur: + return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, run.Addr, cur) + case run.Addr > cur: + if _, err := fmt.Fprintf(w, `{"Gap":%d},`, run.Addr.Sub(cur)); err != nil { + return err + } + fallthrough + default: + if err := lowmemjson.NewEncoder(w).Encode(run); err != nil { + return err + } + cur = run.Addr.Add(run.Size()) + } + } + end := sg.Addr.Add(sg.Size) + switch { + case end < cur: + return fmt.Errorf("invalid %T: addr went backwards: %v < %v", sg, end, cur) + case end > cur: + if _, err := fmt.Fprintf(w, `,{"Gap":%d}`, end.Sub(cur)); err != nil { + return err + } + } + if _, err := w.Write([]byte("]}")); err != nil { + return err + } + return nil +} + +func (sg *SumRunWithGaps[Addr]) DecodeJSON(r io.RuneScanner) error { + *sg = SumRunWithGaps[Addr]{} + var name string + return lowmemjson.DecodeObject(r, + func(r io.RuneScanner) error { + return lowmemjson.NewDecoder(r).Decode(&name) + }, + func(r io.RuneScanner) error { + switch name { + case "Addr": + return lowmemjson.NewDecoder(r).Decode(&sg.Addr) + case "Size": + return lowmemjson.NewDecoder(r).Decode(&sg.Size) + case "Runs": + return lowmemjson.DecodeArray(r, func(r io.RuneScanner) error { + var run btrfssum.SumRun[Addr] + if err := lowmemjson.NewDecoder(r).Decode(&run); err != nil { + return err + } + if run.ChecksumSize > 0 { + sg.Runs = append(sg.Runs, run) + } + return nil + }) + default: + return fmt.Errorf("unknown key %q", name) + } + }) +} diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d new file mode 100644 index 0000000..9d14adf --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/593a5dd328ee86bac6820e3f98383aebadd2af3a22aa248546a228b08451c30d @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("1") diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 new file mode 100644 index 0000000..269f061 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/6bd9babbebf7eb78814e1f3425bfa6ccd6bfd42824a26e8cbd63b5117934e600 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("0") diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 new file mode 100644 index 0000000..b8f1562 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/84ed65595ad05a58e293dbf423c1a816b697e2763a29d7c37aa476d6eef6fd60 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("0") +[]byte("") diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 new file mode 100644 index 0000000..be67506 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/9be40f71bc49b1b5c3b8e08d58d0f69cc22aefb12fd9b5931e49ff0e41953738 @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("\xde\xdb!") +[]byte("\xde\xdb") diff --git a/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d new file mode 100644 index 0000000..c3bfa37 --- /dev/null +++ b/cmd/btrfs-rec/inspect/rebuildmappings/testdata/fuzz/FuzzIndexAll/e43317ec61b0da9627d7b2fc0237d369082a6bf5dfe39e05c113b42ff6218d5d @@ -0,0 +1,3 @@ +go test fuzz v1 +[]byte("\x10\x10\x15") +[]byte("\x10\x15") |