// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

package rebuildnodes

import (
	"context"
	"fmt"
	"sort"

	"github.com/datawire/dlib/dlog"

	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsitem"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsprim"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfssum"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfstree"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfs/btrfsvol"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildmappings"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/graph"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsinspect/rebuildnodes/uuidmap"
	"git.lukeshu.com/btrfs-progs-ng/lib/btrfsprogs/btrfsutil"
	"git.lukeshu.com/btrfs-progs-ng/lib/containers"
	"git.lukeshu.com/btrfs-progs-ng/lib/maps"
)

type Rebuilder struct {
	raw   *btrfs.FS
	inner interface {
		btrfstree.TreeOperator
		Augment(treeID btrfsprim.ObjID, nodeAddr btrfsvol.LogicalAddr) ([]btrfsprim.Key, error)
	}
	sb btrfstree.Superblock

	graph        graph.Graph
	uuidMap      uuidmap.UUIDMap
	csums        containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum]
	orphans      containers.Set[btrfsvol.LogicalAddr]
	leaf2orphans map[btrfsvol.LogicalAddr]containers.Set[btrfsvol.LogicalAddr]
	key2leaf     containers.SortedMap[keyAndTree, btrfsvol.LogicalAddr]

	augments map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr]

	pendingAugments map[btrfsprim.ObjID][]map[btrfsvol.LogicalAddr]int
}

func RebuildNodes(ctx context.Context, fs *btrfs.FS, nodeScanResults btrfsinspect.ScanDevicesResult) (map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr], error) {
	scanData, err := ScanDevices(ctx, fs, nodeScanResults) // ScanDevices does its own logging
	if err != nil {
		return nil, err
	}

	dlog.Info(ctx, "Reading superblock...")
	sb, err := fs.Superblock()
	if err != nil {
		return nil, err
	}

	dlog.Info(ctx, "Indexing checksums...")
	csums, _ := rebuildmappings.ExtractLogicalSums(ctx, nodeScanResults)
	if csums == nil {
		csums = new(containers.RBTree[containers.NativeOrdered[btrfsvol.LogicalAddr], btrfsinspect.SysExtentCSum])
	}

	dlog.Info(ctx, "Indexing orphans...")
	orphans, leaf2orphans, key2leaf, err := indexOrphans(ctx, fs, *sb, *scanData.nodeGraph)
	if err != nil {
		return nil, err
	}

	dlog.Info(ctx, "Rebuilding node tree...")
	o := &Rebuilder{
		raw:   fs,
		inner: btrfsutil.NewBrokenTrees(ctx, fs),
		sb:    *sb,

		graph:        *scanData.nodeGraph,
		uuidMap:      *scanData.uuidMap,
		csums:        *csums,
		orphans:      orphans,
		leaf2orphans: leaf2orphans,
		key2leaf:     *key2leaf,

		augments: make(map[btrfsprim.ObjID]containers.Set[btrfsvol.LogicalAddr]),
	}
	if err := o.rebuild(ctx); err != nil {
		return nil, err
	}

	return o.augments, nil
}

func (o *Rebuilder) ioErr(ctx context.Context, err error) {
	err = fmt.Errorf("should not happen: i/o error: %w", err)
	dlog.Error(ctx, err)
	panic(err)
}

func (o *Rebuilder) rebuild(ctx context.Context) error {
	passNum := 0
	dlog.Infof(ctx, "... pass %d: scanning for implied items", passNum)
	o.pendingAugments = make(map[btrfsprim.ObjID][]map[btrfsvol.LogicalAddr]int)
	btrfsutil.WalkAllTrees(ctx, o.inner, btrfsutil.WalkAllTreesHandler{
		Err: func(*btrfsutil.WalkError) {},
		TreeWalkHandler: btrfstree.TreeWalkHandler{
			Item: func(path btrfstree.TreePath, item btrfstree.Item) error {
				handleItem(o, ctx, path[0].FromTree, item)
				return nil
			},
		},
	})
	for len(o.pendingAugments) > 0 {
		// Apply the augments, keeping track of what keys are added to what tree.
		dlog.Infof(ctx, "... pass %d: augmenting trees to add implied items", passNum)
		newKeys := make(map[btrfsprim.ObjID][]btrfsprim.Key)
		for _, treeID := range maps.SortedKeys(o.pendingAugments) {
			dlog.Infof(ctx, "... ... augmenting tree %v:", treeID)
			treeAugments := o.resolveTreeAugments(ctx, o.pendingAugments[treeID])
			for _, nodeAddr := range maps.SortedKeys(treeAugments) {
				added, err := o.inner.Augment(treeID, nodeAddr)
				if err != nil {
					dlog.Errorf(ctx, "error augmenting: %v", err)
					continue
				}
				newKeys[treeID] = append(newKeys[treeID], added...)

				set := o.augments[treeID]
				if set == nil {
					set = make(containers.Set[btrfsvol.LogicalAddr])
					o.augments[treeID] = set
				}
				set.Insert(nodeAddr)
			}
		}
		// Clear the list of pending augments.
		o.pendingAugments = make(map[btrfsprim.ObjID][]map[btrfsvol.LogicalAddr]int)
		passNum++
		// Call handleItem() for each of the added keys.
		dlog.Infof(ctx, "... pass %d: scanning for implied items", passNum)
		for _, treeID := range maps.SortedKeys(newKeys) {
			for _, key := range newKeys[treeID] {
				item, err := o.inner.TreeLookup(treeID, key)
				if err != nil {
					o.ioErr(ctx, fmt.Errorf("error looking up already-inserted item: tree=%v key=%v: %w",
						treeID, key, err))
				}
				handleItem(o, ctx, treeID, item)
			}
		}
	}
	return nil
}

func (o *Rebuilder) resolveTreeAugments(ctx context.Context, listsWithDistances []map[btrfsvol.LogicalAddr]int) containers.Set[btrfsvol.LogicalAddr] {
	distances := make(map[btrfsvol.LogicalAddr]int)
	generations := make(map[btrfsvol.LogicalAddr]btrfsprim.Generation)
	lists := make([]containers.Set[btrfsvol.LogicalAddr], len(listsWithDistances))
	for i, listWithDistances := range listsWithDistances {
		lists[i] = make(containers.Set[btrfsvol.LogicalAddr], len(listWithDistances))
		for item, dist := range listWithDistances {
			lists[i].Insert(item)
			distances[item] = dist
			generations[item] = o.graph.Nodes[item].Generation
		}
	}

	// Define an algorithm that takes several lists of items, and returns a
	// set of those items such that each input list contains zero or one of
	// the items from your return set.  The same item may appear in multiple
	// of the input lists.
	//
	// > Example 1: Given the input lists
	// >
	// >     0: [A, B]
	// >     2: [A, C]
	// >
	// > legal solutions would be `[]`, `[A]`, `[B]`, `[C]`, or `[B, C]`.  It
	// > would not be legal to return `[A, B]` or `[A, C]`.
	//
	// > Example 2: Given the input lists
	// >
	// >     1: [A, B]
	// >     2: [A]
	// >     3: [B]
	// >
	// > legal solution woudl be `[]`, `[A]` or `[B]`.  It would not be legal
	// > to return `[A, B]`.
	//
	// The algorithm should optimize for the following goals:
	//
	//  - We prefer that each input list have an item in the return set.
	//
	//    > In Example 1, while `[]`, `[B]`, and `[C]` are permissable
	//    > solutions, they are not optimal, because one or both of the input
	//    > lists are not represented.
	//    >
	//    > It may be the case that it is not possible to represent all lists
	//    > in the result; in Example 2, either list 2 or list 3 must be
	//    > unrepresented.
	//
	//  - Each item has a non-negative scalar "distance" score, we prefer
	//    lower distances.  Distance scores are comparable; 0 is preferred,
	//    and a distance of 4 is twice as bad as a distance of 2.
	//
	//  - Each item has a "generation" score, we prefer higher generations.
	//    Generation scores should not be treated as a linear scale; the
	//    magnitude of deltas is meaningless; only the sign of a delta is
	//    meaningful.
	//
	//    > So it would be wrong to say something like
	//    >
	//    >     desirability = (-a*distance) + (b*generation)       // for some constants `a` and `b`
	//    >
	//    > because `generation` can't be used that way
	//
	//  - We prefer items that appear in more lists over items that appear in
	//    fewer lists.
	//
	// The relative priority of these 4 goals is undefined; preferrably the
	// algorithm should be defined in a way that makes it easy to adjust the
	// relative priorities.

	ret := make(containers.Set[btrfsvol.LogicalAddr])
	illegal := make(containers.Set[btrfsvol.LogicalAddr]) // cannot-be-accepted and already-accepted
	accept := func(item btrfsvol.LogicalAddr) {
		ret.Insert(item)
		for _, list := range lists {
			if list.Has(item) {
				illegal.InsertFrom(list)
			}
		}
	}

	counts := make(map[btrfsvol.LogicalAddr]int)
	for _, list := range lists {
		for item := range list {
			counts[item] = counts[item] + 1
		}
	}

	sortedItems := maps.Keys(distances)
	sort.Slice(sortedItems, func(i, j int) bool {
		iItem, jItem := sortedItems[i], sortedItems[j]
		if counts[iItem] != counts[jItem] {
			return counts[iItem] > counts[jItem] // reverse this check; higher counts should sort lower
		}
		if distances[iItem] != distances[jItem] {
			return distances[iItem] < distances[jItem]
		}
		if generations[iItem] != generations[jItem] {
			return generations[iItem] > generations[jItem] // reverse this check; higher generations should sort lower
		}
		return iItem < jItem // laddr is as good a tiebreaker as anything
	})
	for _, item := range sortedItems {
		if !illegal.Has(item) {
			accept(item)
		}
	}

	for i, list := range lists {
		dlog.Infof(ctx, "... ... ... %d: %v: %v", i, list.Intersection(ret).TakeOne(), maps.SortedKeys(list))
	}

	return ret
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

// _NodeFile is a subset of btrfstree.NodeFile.
type _NodeFile interface {
	ParentTree(btrfsprim.ObjID) (btrfsprim.ObjID, bool)
}

func treeDistance(fs _NodeFile, tree, leaf btrfsprim.ObjID) (int, bool) {
	dist := 0
	for {
		if tree == leaf {
			return dist, true
		}

		parentTree, ok := fs.ParentTree(tree)
		if !ok {
			// Failed to look up parent info.
			return 0, false
		}
		if parentTree == 0 {
			// End of the line.
			return 0, false
		}

		tree = parentTree
		dist++
	}
}

func (o *Rebuilder) wantAugment(ctx context.Context, treeID btrfsprim.ObjID, choices containers.Set[btrfsvol.LogicalAddr]) {
	choicesWithDist := make(map[btrfsvol.LogicalAddr]int)
	for choice := range choices {
		if dist, ok := treeDistance(o.uuidMap, treeID, o.graph.Nodes[choice].Owner); ok {
			choicesWithDist[choice] = dist
		}
	}
	if len(choicesWithDist) == 0 {
		dlog.Errorf(ctx, "augment(tree=%v): could not find wanted item", treeID)
		return
	}
	dlog.Infof(ctx, "augment(tree=%v): %v", treeID, maps.SortedKeys(choicesWithDist))
	o.pendingAugments[treeID] = append(o.pendingAugments[treeID], choicesWithDist)
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

// fsErr implements rebuildCallbacks.
func (o *Rebuilder) fsErr(ctx context.Context, e error) {
	dlog.Errorf(ctx, "filesystem error: %v", e)
}

// want implements rebuildCallbacks.
func (o *Rebuilder) want(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType) {
	// check if we already have it

	tgt := btrfsprim.Key{
		ObjectID: objID,
		ItemType: typ,
	}
	if _, err := o.inner.TreeSearch(treeID, func(key btrfsprim.Key, _ uint32) int {
		key.Offset = 0
		return tgt.Cmp(key)
	}); err == nil {
		return
	}

	// OK, we need to insert it

	ctx = dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key={%v %v ?}", treeID, objID, typ))
	wants := make(containers.Set[btrfsvol.LogicalAddr])
	o.key2leaf.Subrange(
		func(k keyAndTree, _ btrfsvol.LogicalAddr) int { k.Key.Offset = 0; return tgt.Cmp(k.Key) },
		func(_ keyAndTree, v btrfsvol.LogicalAddr) bool {
			wants.InsertFrom(o.leaf2orphans[v])
			return true
		})
	o.wantAugment(ctx, treeID, wants)
}

// wantOff implements rebuildCallbacks.
func (o *Rebuilder) wantOff(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, off uint64) {
	// check if we already have it

	tgt := btrfsprim.Key{
		ObjectID: objID,
		ItemType: typ,
		Offset:   off,
	}
	if _, err := o.inner.TreeLookup(treeID, tgt); err == nil {
		return
	}

	// OK, we need to insert it

	ctx = dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key=%v", treeID, tgt))
	wants := make(containers.Set[btrfsvol.LogicalAddr])
	o.key2leaf.Subrange(
		func(k keyAndTree, _ btrfsvol.LogicalAddr) int { return tgt.Cmp(k.Key) },
		func(_ keyAndTree, v btrfsvol.LogicalAddr) bool {
			wants.InsertFrom(o.leaf2orphans[v])
			return true
		})
	o.wantAugment(ctx, treeID, wants)
}

// wantFunc implements rebuildCallbacks.
func (o *Rebuilder) wantFunc(ctx context.Context, treeID btrfsprim.ObjID, objID btrfsprim.ObjID, typ btrfsprim.ItemType, fn func(btrfsitem.Item) bool) {
	// check if we already have it

	tgt := btrfsprim.Key{
		ObjectID: objID,
		ItemType: typ,
	}
	items, _ := o.inner.TreeSearchAll(treeID, func(key btrfsprim.Key, _ uint32) int {
		key.Offset = 0
		return tgt.Cmp(key)
	})
	for _, item := range items {
		if fn(item.Body) {
			return
		}
	}

	// OK, we need to insert it

	ctx = dlog.WithField(ctx, "want_key", fmt.Sprintf("tree=%v key=%v +func", treeID, tgt))
	wants := make(containers.Set[btrfsvol.LogicalAddr])
	o.key2leaf.Subrange(
		func(k keyAndTree, _ btrfsvol.LogicalAddr) int { k.Key.Offset = 0; return tgt.Cmp(k.Key) },
		func(k keyAndTree, v btrfsvol.LogicalAddr) bool {
			nodeRef, err := btrfstree.ReadNode[btrfsvol.LogicalAddr](o.raw, o.sb, v, btrfstree.NodeExpectations{
				LAddr:      containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: v},
				Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: o.graph.Nodes[v].Generation},
			})
			if err != nil {
				o.ioErr(ctx, err)
			}
			for _, item := range nodeRef.Data.BodyLeaf {
				if k.Key == item.Key && fn(item.Body) {
					wants.InsertFrom(o.leaf2orphans[v])
				}
			}
			return true
		})
	o.wantAugment(ctx, treeID, wants)
}

// func implements rebuildCallbacks.
//
// interval is [beg, end)
func (o *Rebuilder) wantCSum(ctx context.Context, beg, end btrfsvol.LogicalAddr) {
	for beg < end {
		// check if we already have it
		if run, err := btrfs.LookupCSum(o.inner, o.sb.ChecksumType, beg); err == nil {
			// we already have it
			beg = run.Addr.Add(run.Size())
		} else {
			// we need to insert it
			ctx := dlog.WithField(ctx, "want_key", fmt.Sprintf("csum for laddr=%v", beg))
			rbNode := o.csums.Search(func(item btrfsinspect.SysExtentCSum) int {
				switch {
				case item.Sums.Addr > beg:
					return -1
				case item.Sums.Addr.Add(item.Sums.Size()) <= beg:
					return 1
				default:
					return 0
				}

			})
			if rbNode == nil {
				o.wantAugment(ctx, btrfsprim.CSUM_TREE_OBJECTID, nil) // log an error
				beg += btrfssum.BlockSize
				continue
			}
			run := rbNode.Value.Sums
			key := keyAndTree{
				Key:    rbNode.Value.Key,
				TreeID: btrfsprim.CSUM_TREE_OBJECTID,
			}
			leaf, ok := o.key2leaf.Load(key)
			if !ok {
				// This is a panic because if we found it in `o.csums` then it has
				// to be in some Node, and if we didn't find it from
				// btrfs.LookupCSum(), then that Node must be an orphan.
				panic(fmt.Errorf("should not happen: no orphan contains %v", key.Key))
			}
			o.wantAugment(ctx, key.TreeID, o.leaf2orphans[leaf])

			beg = run.Addr.Add(run.Size())
		}
	}
}

// wantFileExt implements rebuildCallbacks.
func (o *Rebuilder) wantFileExt(ctx context.Context, treeID btrfsprim.ObjID, ino btrfsprim.ObjID, size int64) {
	min := btrfsprim.Key{
		ObjectID: ino,
		ItemType: btrfsitem.EXTENT_DATA_KEY,
		Offset:   0,
	}
	max := btrfsprim.Key{
		ObjectID: ino,
		ItemType: btrfsitem.EXTENT_DATA_KEY,
		Offset:   uint64(size - 1),
	}
	exts, _ := o.inner.TreeSearchAll(treeID, func(key btrfsprim.Key, _ uint32) int {
		switch {
		case min.Cmp(key) < 0:
			return 1
		case max.Cmp(key) > 0:
			return -1
		default:
			return 0
		}
	})

	type gap struct {
		// range is [Beg,End)
		Beg, End int64
	}
	gaps := &containers.RBTree[containers.NativeOrdered[int64], gap]{
		KeyFn: func(gap gap) containers.NativeOrdered[int64] {
			return containers.NativeOrdered[int64]{Val: gap.Beg}
		},
	}
	gaps.Insert(gap{
		Beg: 0,
		End: size,
	})
	for _, ext := range exts {
		switch extBody := ext.Body.(type) {
		case btrfsitem.FileExtent:
			extBeg := int64(ext.Key.Offset)
			extSize, err := extBody.Size()
			if err != nil {
				o.fsErr(ctx, fmt.Errorf("FileExtent: tree=%v key=%v: %w", treeID, ext.Key, err))
				continue
			}
			extEnd := extBeg + extSize
			overlappingGaps := gaps.SearchRange(func(gap gap) int {
				switch {
				case gap.End <= extBeg:
					return 1
				case extEnd <= gap.Beg:
					return -1
				default:
					return 0
				}
			})
			if len(overlappingGaps) == 0 {
				continue
			}
			beg := overlappingGaps[0].Beg
			end := overlappingGaps[len(overlappingGaps)-1].End
			for _, gap := range overlappingGaps {
				gaps.Delete(containers.NativeOrdered[int64]{Val: gap.Beg})
			}
			if beg < extBeg {
				gaps.Insert(gap{
					Beg: beg,
					End: extBeg,
				})
			}
			if end > extEnd {
				gaps.Insert(gap{
					Beg: extEnd,
					End: end,
				})
			}
		case btrfsitem.Error:
			o.fsErr(ctx, fmt.Errorf("error decoding item: tree=%v key=%v: %w", treeID, ext.Key, extBody.Err))
		default:
			// This is a panic because the item decoder should not emit EXTENT_DATA
			// items as anything but btrfsitem.FileExtent or btrfsitem.Error without
			// this code also being updated.
			panic(fmt.Errorf("should not happen: EXTENT_DATA item has unexpected type: %T", extBody))
		}
	}
	_ = gaps.Walk(func(rbNode *containers.RBNode[gap]) error {
		gap := rbNode.Value
		min := btrfsprim.Key{
			ObjectID: ino,
			ItemType: btrfsitem.EXTENT_DATA_KEY,
			Offset:   0,
		}
		max := btrfsprim.Key{
			ObjectID: ino,
			ItemType: btrfsitem.EXTENT_DATA_KEY,
			Offset:   uint64(gap.End - 1),
		}
		ctx := dlog.WithField(ctx, "want_key", fmt.Sprintf("file extent for tree=%v inode=%v bytes [%v, %v)", treeID, ino, gap.Beg, gap.End))
		wants := make(containers.Set[btrfsvol.LogicalAddr])
		o.key2leaf.Subrange(
			func(k keyAndTree, _ btrfsvol.LogicalAddr) int {
				switch {
				case min.Cmp(k.Key) < 0:
					return 1
				case max.Cmp(k.Key) > 0:
					return -1
				default:
					return 0
				}
			},
			func(k keyAndTree, v btrfsvol.LogicalAddr) bool {
				nodeRef, err := btrfstree.ReadNode[btrfsvol.LogicalAddr](o.raw, o.sb, v, btrfstree.NodeExpectations{
					LAddr:      containers.Optional[btrfsvol.LogicalAddr]{OK: true, Val: v},
					Generation: containers.Optional[btrfsprim.Generation]{OK: true, Val: o.graph.Nodes[v].Generation},
				})
				if err != nil {
					o.ioErr(ctx, fmt.Errorf("error reading previously read node@%v: %w", v, err))
				}
				for _, item := range nodeRef.Data.BodyLeaf {
					if k.Key != item.Key {
						continue
					}
					switch itemBody := item.Body.(type) {
					case btrfsitem.FileExtent:
						itemBeg := int64(item.Key.Offset)
						itemSize, err := itemBody.Size()
						if err != nil {
							o.fsErr(ctx, fmt.Errorf("FileExtent: tree=%v key=%v: %w", treeID, item.Key, err))
							continue
						}
						itemEnd := itemBeg + itemSize
						// We're being and "wanting" any extent that has any overlap with the
						// gap.  But maybe instead we sould only want extents that are
						// *entirely* within the gap.  I'll have to run it on real filesystems
						// to see what works better.
						//
						// TODO(lukeshu): Re-evaluate whether being greedy here is the right
						// thing.
						if itemEnd > gap.Beg && itemBeg < gap.End {
							wants.InsertFrom(o.leaf2orphans[v])
						}
					case btrfsitem.Error:
						o.fsErr(ctx, fmt.Errorf("error decoding item: tree=%v key=%v: %w", treeID, item.Key, itemBody.Err))
					default:
						// This is a panic because the item decoder should not emit EXTENT_DATA
						// items as anything but btrfsitem.FileExtent or btrfsitem.Error without
						// this code also being updated.
						panic(fmt.Errorf("should not happen: EXTENT_DATA item has unexpected type: %T", itemBody))
					}
				}
				return true
			})
		o.wantAugment(ctx, treeID, wants)
		return nil
	})
}