From 0057ac685125aea5cf06dfd8eeaa7c7d52e64dfa Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 15 Jul 2022 14:36:47 -0600
Subject: wip

---
 lib/btrfsprogs/btrfsinspect/scanforextents/csum.go | 125 ++++++++++++
 lib/btrfsprogs/btrfsinspect/scanforextents/gaps.go |  82 ++++++++
 .../btrfsinspect/scanforextents/scanforextents.go  | 214 +++++++++++++++++++++
 .../btrfsinspect/scanforextents/scanfornodes.go    | 101 ++++++++++
 4 files changed, 522 insertions(+)
 create mode 100644 lib/btrfsprogs/btrfsinspect/scanforextents/csum.go
 create mode 100644 lib/btrfsprogs/btrfsinspect/scanforextents/gaps.go
 create mode 100644 lib/btrfsprogs/btrfsinspect/scanforextents/scanforextents.go
 create mode 100644 lib/btrfsprogs/btrfsinspect/scanforextents/scanfornodes.go

(limited to 'lib/btrfsprogs/btrfsinspect/scanforextents')

diff --git a/lib/btrfsprogs/btrfsinspect/scanforextents/csum.go b/lib/btrfsprogs/btrfsinspect/scanforextents/csum.go
new file mode 100644
index 0000000..7944497
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/scanforextents/csum.go
@@ -0,0 +1,125 @@
+// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package scanforextents
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	"github.com/datawire/dlib/dlog"
+
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+)
+
+const csumBlockSize = 4 * 1024
+
+func ChecksumLogical(fs btrfs.Trees, alg btrfssum.CSumType, laddr btrfsvol.LogicalAddr) (btrfssum.CSum, error) {
+	var dat [csumBlockSize]byte
+	if _, err := fs.ReadAt(dat[:], laddr); err != nil {
+		return btrfssum.CSum{}, err
+	}
+	return alg.Sum(dat[:])
+}
+
+func ChecksumPhysical(dev *btrfs.Device, alg btrfssum.CSumType, paddr btrfsvol.PhysicalAddr) (btrfssum.CSum, error) {
+	var dat [csumBlockSize]byte
+	if _, err := dev.ReadAt(dat[:], paddr); err != nil {
+		return btrfssum.CSum{}, err
+	}
+	return alg.Sum(dat[:])
+}
+
+func ChecksumQualifiedPhysical(fs *btrfs.FS, alg btrfssum.CSumType, paddr btrfsvol.QualifiedPhysicalAddr) (btrfssum.CSum, error) {
+	dev := fs.LV.PhysicalVolumes()[paddr.Dev]
+	if dev == nil {
+		return btrfssum.CSum{}, fmt.Errorf("no such device_id=%v", paddr.Dev)
+	}
+	return ChecksumPhysical(dev, alg, paddr.Addr)
+}
+
+type shortSum string
+
+func readCSumTree(ctx context.Context, fs btrfs.Trees) map[shortSum][]btrfsvol.LogicalAddr {
+	sb, _ := fs.Superblock()
+
+	sum2laddrs := make(map[shortSum][]btrfsvol.LogicalAddr)
+	var cntUnmapped, cntErr, cntMismatch, cntValid int
+	fs.TreeWalk(ctx, btrfs.CSUM_TREE_OBJECTID,
+		func(err *btrfs.TreeError) {
+			dlog.Error(ctx, err)
+		},
+		btrfs.TreeWalkHandler{
+			Item: func(path btrfs.TreePath, item btrfs.Item) error {
+				if item.Key.ItemType != btrfsitem.EXTENT_CSUM_KEY {
+					return nil
+				}
+				body := item.Body.(btrfsitem.ExtentCSum)
+
+				for i, treeSum := range body.Sums {
+					laddr := btrfsvol.LogicalAddr(item.Key.Offset) + (btrfsvol.LogicalAddr(i) * csumBlockSize)
+					readSum, err := ChecksumLogical(fs, sb.ChecksumType, laddr)
+					if err != nil {
+						if errors.Is(err, btrfsvol.ErrCouldNotMap) {
+							cntUnmapped++
+							treeShortSum := shortSum(treeSum[:body.ChecksumSize])
+							sum2laddrs[treeShortSum] = append(sum2laddrs[treeShortSum], laddr)
+							continue
+						}
+						dlog.Error(ctx, err)
+						cntErr++
+						continue
+					}
+					if readSum != treeSum {
+						dlog.Errorf(ctx, "checksum mismatch at laddr=%v: CSUM_TREE=%v != read=%v",
+							laddr, treeSum, readSum)
+						cntMismatch++
+						continue
+					}
+					cntValid++
+				}
+				return nil
+			},
+		},
+	)
+
+	total := cntErr + cntUnmapped + cntMismatch + cntValid
+	dlog.Infof(ctx, "  checksum errors          : %v", cntErr)
+	dlog.Infof(ctx, "  unmapped checksums       : %v", cntUnmapped)
+	dlog.Infof(ctx, "  mismatched checksums     : %v", cntMismatch)
+	dlog.Infof(ctx, "  valid checksums          : %v", cntValid)
+	dlog.Infof(ctx, "  -------------------------:")
+	dlog.Infof(ctx, "  total checksums          : %v", total)
+	dlog.Infof(ctx, "  distinct unmapped        : %v", len(sum2laddrs))
+
+	return sum2laddrs
+}
+
+func LookupCSum(fs btrfs.Trees, alg btrfssum.CSumType, laddr btrfsvol.LogicalAddr) (btrfssum.CSum, error) {
+	item, err := fs.TreeSearch(btrfs.CSUM_TREE_OBJECTID, func(key btrfs.Key, size uint32) int {
+		itemBeg := btrfsvol.LogicalAddr(key.ObjectID)
+		numSums := int64(size) / int64(alg.Size())
+		itemEnd := itemBeg + btrfsvol.LogicalAddr(numSums*csumBlockSize)
+		switch {
+		case itemEnd <= laddr:
+			return 1
+		case laddr < itemBeg:
+			return -1
+		default:
+			return 0
+		}
+	})
+	if err != nil {
+		return btrfssum.CSum{}, err
+	}
+	body, ok := item.Body.(btrfsitem.ExtentCSum)
+	if !ok {
+		return btrfssum.CSum{}, fmt.Errorf("item body is %T not ExtentCSum", item.Body)
+	}
+	return body.Sums[int(laddr-btrfsvol.LogicalAddr(item.Key.ObjectID))/csumBlockSize], nil
+}
diff --git a/lib/btrfsprogs/btrfsinspect/scanforextents/gaps.go b/lib/btrfsprogs/btrfsinspect/scanforextents/gaps.go
new file mode 100644
index 0000000..90351a5
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/scanforextents/gaps.go
@@ -0,0 +1,82 @@
+// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package scanforextents
+
+import (
+	"context"
+	"sort"
+
+	"golang.org/x/exp/constraints"
+
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+)
+
+type PhysicalGap struct {
+	Beg, End btrfsvol.PhysicalAddr
+}
+
+func ListPhysicalGaps(fs *btrfs.FS) map[btrfsvol.DeviceID][]PhysicalGap {
+	gaps := make(map[btrfsvol.DeviceID][]PhysicalGap)
+	pos := make(map[btrfsvol.DeviceID]btrfsvol.PhysicalAddr)
+	mappings := fs.LV.Mappings()
+	sort.Slice(mappings, func(i, j int) bool {
+		return mappings[i].PAddr.Cmp(mappings[j].PAddr) < 0
+	})
+	for _, mapping := range mappings {
+		if pos[mapping.PAddr.Dev] < mapping.PAddr.Addr {
+			gaps[mapping.PAddr.Dev] = append(gaps[mapping.PAddr.Dev], PhysicalGap{
+				Beg: pos[mapping.PAddr.Dev],
+				End: mapping.PAddr.Addr,
+			})
+		}
+		if pos[mapping.PAddr.Dev] < mapping.PAddr.Addr.Add(mapping.Size) {
+			pos[mapping.PAddr.Dev] = mapping.PAddr.Addr.Add(mapping.Size)
+		}
+	}
+	for devID, dev := range fs.LV.PhysicalVolumes() {
+		devSize := dev.Size()
+		if pos[devID] < devSize {
+			gaps[devID] = append(gaps[devID], PhysicalGap{
+				Beg: pos[devID],
+				End: devSize,
+			})
+		}
+	}
+	return gaps
+}
+
+func roundUp[T constraints.Integer](x, multiple T) T {
+	return ((x + multiple - 1) / multiple) * multiple
+}
+
+func WalkGapsOneDev(ctx context.Context, dev *btrfs.Device,
+	gaps []PhysicalGap, blockSize btrfsvol.PhysicalAddr,
+	progress func(cur, total int64),
+	main func(btrfsvol.PhysicalAddr) error,
+) error {
+	var totalBlocks int64
+	for _, gap := range gaps {
+		for paddr := roundUp(gap.Beg, blockSize); paddr+blockSize <= gap.End; paddr += blockSize {
+			totalBlocks++
+		}
+	}
+
+	var curBlock int64
+	for _, gap := range gaps {
+		for paddr := roundUp(gap.Beg, blockSize); paddr+blockSize <= gap.End; paddr += blockSize {
+			if err := ctx.Err(); err != nil {
+				return err
+			}
+			progress(curBlock, totalBlocks)
+			curBlock++
+			if err := main(paddr); err != nil {
+				return err
+			}
+		}
+	}
+	progress(curBlock, totalBlocks)
+	return nil
+}
diff --git a/lib/btrfsprogs/btrfsinspect/scanforextents/scanforextents.go b/lib/btrfsprogs/btrfsinspect/scanforextents/scanforextents.go
new file mode 100644
index 0000000..1537fb5
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/scanforextents/scanforextents.go
@@ -0,0 +1,214 @@
+// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package scanforextents
+
+import (
+	"context"
+	"sync"
+
+	"github.com/datawire/dlib/dgroup"
+	"github.com/datawire/dlib/dlog"
+
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsutil"
+	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
+)
+
+func ScanForExtents(ctx context.Context, fs *btrfs.FS, blockGroups *BlockGroupTree) error {
+	treeReader := btrfsutil.NewBrokenTrees(ctx, fs)
+
+	dlog.Info(ctx, "Reading checksum tree...")
+	sum2laddrs := readCSumTree(ctx, treeReader)
+	if len(sum2laddrs) == 0 {
+		dlog.Info(ctx, "No unmapped checksums")
+		return nil
+	}
+
+	devs := fs.LV.PhysicalVolumes()
+	gaps := ListPhysicalGaps(fs)
+
+	newMappings := &ExtentMappings{
+		InFS:          treeReader,
+		InLV:          &fs.LV,
+		InSum2laddrs:  sum2laddrs,
+		InBlockGroups: blockGroups,
+	}
+
+	grp := dgroup.NewGroup(ctx, dgroup.GroupConfig{})
+	for devID := range gaps {
+		dev := devs[devID]
+		devGaps := gaps[devID]
+		grp.Go(dev.Name(), func(ctx context.Context) error {
+			return newMappings.ScanOneDev(ctx, dev, devGaps)
+		})
+	}
+	if err := grp.Wait(); err != nil {
+		return err
+	}
+
+	for laddr, mappings := range newMappings.OutSum2mappings {
+		if len(mappings) > 1 {
+			dlog.Errorf(ctx, "multiple possibilities for laddr=%v :", laddr)
+			for _, mapping := range mappings {
+				dlog.Errorf(ctx, "  - %#v", *mapping)
+			}
+			continue
+		}
+		if err := fs.LV.AddMapping(*mappings[0]); err != nil {
+			dlog.Error(ctx, err)
+		}
+	}
+
+	return nil
+}
+
+type ExtentMappings struct {
+	// input
+	InFS          btrfs.Trees
+	InLV          *btrfsvol.LogicalVolume[*btrfs.Device]
+	InSum2laddrs  map[shortSum][]btrfsvol.LogicalAddr
+	InBlockGroups *BlockGroupTree
+
+	// state
+	initOnce           sync.Once
+	initErr            error
+	alg                btrfssum.CSumType
+	internedMappingsMu sync.Mutex
+	internedMappings   map[btrfsvol.Mapping]*btrfsvol.Mapping
+
+	// output
+	sum2lock        map[shortSum]*sync.Mutex
+	OutSum2mappings map[shortSum][]*btrfsvol.Mapping
+}
+
+func (em *ExtentMappings) init() error {
+	em.initOnce.Do(func() {
+		sb, err := em.InFS.Superblock()
+		if err != nil {
+			em.initErr = err
+			return
+		}
+		em.alg = sb.ChecksumType
+		em.internedMappings = make(map[btrfsvol.Mapping]*btrfsvol.Mapping)
+		em.sum2lock = make(map[shortSum]*sync.Mutex, len(em.InSum2laddrs))
+		for sum := range em.InSum2laddrs {
+			em.sum2lock[sum] = new(sync.Mutex)
+		}
+		em.OutSum2mappings = make(map[shortSum][]*btrfsvol.Mapping)
+	})
+	return em.initErr
+}
+
+func (em *ExtentMappings) considerMapping(dev *btrfs.Device, laddr btrfsvol.LogicalAddr, paddr btrfsvol.QualifiedPhysicalAddr) (btrfsvol.Mapping, bool) {
+	blockgroup := LookupBlockGroup(em.InBlockGroups, laddr, csumBlockSize)
+	if blockgroup == nil {
+		return btrfsvol.Mapping{
+			LAddr: laddr,
+			PAddr: paddr,
+			Size:  csumBlockSize,
+		}, true
+	}
+	mapping := btrfsvol.Mapping{
+		LAddr: blockgroup.LAddr,
+		PAddr: btrfsvol.QualifiedPhysicalAddr{
+			Dev:  paddr.Dev,
+			Addr: paddr.Addr.Add(laddr.Sub(blockgroup.LAddr)),
+		},
+		Size:       blockgroup.Size,
+		SizeLocked: true,
+		Flags: containers.Optional[btrfsvol.BlockGroupFlags]{
+			OK:  true,
+			Val: blockgroup.Flags,
+		},
+	}
+	if !em.InLV.CouldAddMapping(mapping) {
+		return btrfsvol.Mapping{}, false
+	}
+
+	for offset := btrfsvol.AddrDelta(0); offset <= mapping.Size; offset += csumBlockSize {
+		expCSum, err := LookupCSum(em.InFS, em.alg, mapping.LAddr.Add(offset))
+		if err != nil {
+			continue
+		}
+		actCSum, err := ChecksumPhysical(dev, em.alg, mapping.PAddr.Addr.Add(offset))
+		if err != nil {
+			return btrfsvol.Mapping{}, false
+		}
+		if actCSum != expCSum {
+			return btrfsvol.Mapping{}, false
+		}
+	}
+	return mapping, true
+}
+
+func (em *ExtentMappings) addMapping(sum shortSum, mapping btrfsvol.Mapping) {
+	em.internedMappingsMu.Lock()
+	interned := em.internedMappings[mapping]
+	if interned == nil {
+		interned = &mapping
+		em.internedMappings[mapping] = interned
+	}
+	em.internedMappingsMu.Unlock()
+
+	em.sum2lock[sum].Lock()
+	em.OutSum2mappings[sum] = append(em.OutSum2mappings[sum], interned)
+	em.sum2lock[sum].Unlock()
+}
+
+func (em *ExtentMappings) ScanOneDev(ctx context.Context, dev *btrfs.Device, gaps []PhysicalGap) error {
+	if err := em.init(); err != nil {
+		return err
+	}
+	devID := func() btrfsvol.DeviceID {
+		sb, _ := dev.Superblock()
+		return sb.DevItem.DevID
+	}()
+
+	dlog.Infof(ctx, "... dev[%q] Scanning for extents...", dev.Name())
+
+	sumSize := em.alg.Size()
+
+	lastProgress := -1
+	return WalkGapsOneDev(ctx, dev, gaps, csumBlockSize,
+		func(curBlock, totalBlocks int64) {
+			pct := int(100 * float64(curBlock) / float64(totalBlocks))
+			if pct != lastProgress || curBlock == totalBlocks {
+				dlog.Infof(ctx, "... dev[%q] scanned %v%%",
+					dev.Name(), pct)
+				lastProgress = pct
+			}
+		},
+		func(paddr btrfsvol.PhysicalAddr) error {
+			if err := ctx.Err(); err != nil {
+				return err
+			}
+			sum, err := ChecksumPhysical(dev, em.alg, paddr)
+			if err != nil {
+				dlog.Errorf(ctx, "... dev[%s] error: checksumming paddr=%v: %v",
+					dev.Name(), paddr, err)
+				return nil
+			}
+			shortSum := shortSum(sum[:sumSize])
+
+			for _, laddr := range em.InSum2laddrs[shortSum] {
+				if err := ctx.Err(); err != nil {
+					return err
+				}
+				mapping, ok := em.considerMapping(dev, laddr, btrfsvol.QualifiedPhysicalAddr{
+					Dev:  devID,
+					Addr: paddr,
+				})
+				if !ok {
+					continue
+				}
+				em.addMapping(shortSum, mapping)
+			}
+
+			return nil
+		},
+	)
+}
diff --git a/lib/btrfsprogs/btrfsinspect/scanforextents/scanfornodes.go b/lib/btrfsprogs/btrfsinspect/scanforextents/scanfornodes.go
new file mode 100644
index 0000000..99009e6
--- /dev/null
+++ b/lib/btrfsprogs/btrfsinspect/scanforextents/scanfornodes.go
@@ -0,0 +1,101 @@
+// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package scanforextents
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
+	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect"
+	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
+)
+
+type NodeScanResults = map[btrfsvol.DeviceID]btrfsinspect.ScanOneDevResult
+
+func ReadNodeScanResults(fs *btrfs.FS, filename string) (*BlockGroupTree, error) {
+	scanResultsBytes, err := os.ReadFile(filename)
+	if err != nil {
+		return nil, err
+	}
+
+	var scanResults NodeScanResults
+	if err := json.Unmarshal(scanResultsBytes, &scanResults); err != nil {
+		return nil, err
+	}
+
+	bgTree, err := ReduceScanResults(fs, scanResults)
+	if err != nil {
+		return nil, err
+	}
+
+	return bgTree, nil
+}
+
+type BlockGroup struct {
+	LAddr btrfsvol.LogicalAddr
+	Size  btrfsvol.AddrDelta
+	Flags btrfsvol.BlockGroupFlags
+}
+
+type BlockGroupTree = containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], BlockGroup]
+
+func LookupBlockGroup(tree *BlockGroupTree, laddr btrfsvol.LogicalAddr, size btrfsvol.AddrDelta) *BlockGroup {
+	a := struct {
+		LAddr btrfsvol.LogicalAddr
+		Size  btrfsvol.AddrDelta
+	}{
+		LAddr: laddr,
+		Size:  size,
+	}
+	node := tree.Search(func(b BlockGroup) int {
+		switch {
+		case a.LAddr.Add(a.Size) <= b.LAddr:
+			// 'a' is wholly to the left of 'b'.
+			return -1
+		case b.LAddr.Add(b.Size) <= a.LAddr:
+			// 'a' is wholly to the right of 'b'.
+			return 1
+		default:
+			// There is some overlap.
+			return 0
+		}
+	})
+	if node == nil {
+		return nil
+	}
+	bg := node.Value
+	return &bg
+}
+
+func ReduceScanResults(fs *btrfs.FS, scanResults NodeScanResults) (*BlockGroupTree, error) {
+	bgSet := make(map[BlockGroup]struct{})
+	for _, found := range scanResults {
+		for _, bg := range found.FoundBlockGroups {
+			bgSet[BlockGroup{
+				LAddr: btrfsvol.LogicalAddr(bg.Key.ObjectID),
+				Size:  btrfsvol.AddrDelta(bg.Key.Offset),
+				Flags: bg.BG.Flags,
+			}] = struct{}{}
+		}
+	}
+	bgTree := &BlockGroupTree{
+		KeyFn: func(bg BlockGroup) containers.NativeOrdered[btrfsvol.LogicalAddr] {
+			return containers.NativeOrdered[btrfsvol.LogicalAddr]{Val: bg.LAddr}
+		},
+	}
+	for bg := range bgSet {
+		if laddr, _ := fs.LV.ResolveAny(bg.LAddr, bg.Size); laddr >= 0 {
+			continue
+		}
+		if LookupBlockGroup(bgTree, bg.LAddr, bg.Size) != nil {
+			return nil, fmt.Errorf("found block groups are inconsistent")
+		}
+		bgTree.Insert(bg)
+	}
+	return bgTree, nil
+}
-- 
cgit v1.2.3-2-g168b