// Copyright (C) 2023  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

// This file should be reasonably readable from top-to-bottom; I've
// tried to write it in a sort-of "literate programming" style.  That
// makes the file comparatively huge--but don't let that intimidate
// you, it's only huge because of the detailed comments; it's less
// than 300 lines without the comments.

package caching

import (
	"context"
	"fmt"
	"sync"
)

// NewARCache returns a new thread-safe Adaptive Replacement Cache
// (ARC).
//
// Fundamentally, the point of ARC is to combine both recency
// information and frequency information together to form a cache
// policy that is better than both least-recently-used eviction (LRU)
// or least-frequently-used eviction (LFU); and the balance between
// how much weight is given to recency vs frequency is "adaptive"
// based on the characteristics of the current workload.
//
// The Adaptive Replacement Cache is patented by IBM (patent
// US-6,996,676-B2 is set to expire 2024-02-22).
//
// This implementation does NOT make use of the enhancements in ZFS'
// enhanced ARC, which are patented by Sun (now Oracle) (patent
// US-7,469,320-B2 is set to expire 2027-02-13).
//
// This implementation has a few enhancements compared to standard
// ARC:
//
//   - This implementation supports explicitly deleting/invalidating
//     cache entries; the standard ARC algorithm assumes that the only
//     reason an entry is ever removed from the cache is because the
//     cache is full and the entry had to be evicted to make room for
//     a new entry.
//
//   - This implementation supports pinning entries such that they
//     cannot be evicted.  This is one of the enhancement from the
//     enhanced version of ARC used by ZFS, but the version here is
//     not based on the ZFS version.
//
// It is invalid (runtime-panic) to call NewARCache with a
// non-positive capacity or a nil source.
func NewARCache[K comparable, V any](cap int, src Source[K, V]) Cache[K, V] {
	// Pass the parameters in.
	if cap <= 0 {
		panic(fmt.Errorf("caching.NewLRUCache: invalid capacity: %v", cap))
	}
	if src == nil {
		panic(fmt.Errorf("caching.NewLRUCache: nil source"))
	}
	ret := &arCache[K, V]{
		cap: cap,
		src: src,
	}
	// Do allocations up-front.  Don't yet worry about what these
	// members are; we'll get to that in the below description of
	// the datatypes.
	for i := 0; i < cap; i++ {
		ret.unusedLive.Store(new(LinkedListEntry[arcLiveEntry[K, V]]))
		ret.unusedGhost.Store(new(LinkedListEntry[arcGhostEntry[K]]))
	}
	// Return.
	return ret
}

// Related literature:
//
//   The comments in this file use terminology from the original ARC
//   paper: "ARC: A Self-Tuning, Low Overhead Replacement Cache" by
//   N. Megiddo & D. Modha, FAST 2003.
//   https://www.usenix.org/legacy/events/fast03/tech/full_papers/megiddo/megiddo.pdf
//
//   But, this file tries to be clear enough that it makes sense
//   without reading the paper.
//
//   If you do read the paper, a difference to keep an eye out for is
//   that in order to support "delete", several of the assumptions
//   related to DBL(2c) are no longer true.  Specifically, we must
//   handle the cache not being full in cases other than a DBL(2c) miss;
//   and two of the invariants from Π(c) are no longer true (see the bit
//   about invariants below).  Besides the obvious (calls to
//   synchronization primitives, things with "del" or "pin" in the
//   name), places where the standard ARC algorithm is modified to
//   support deletion or pinning should be clearly commented.
//
// Background / data structures:
//
//   `ARC(c)` -- that is, an adaptive replacement cache with capacity
//   `c` -- is most reasonably understood in terms of an "imaginary"
//   simpler `DBL(2c)` algorithm.
//
//   DBL(2c) is an cache that maintains 2c entries in a set of lists
//   ordered by LRU/MRU.  These lists are called L₁ or "recent" and L₂
//   or "frequent"; |L₁| + |L₂| ≤ 2c.  L₁/recent is for entries that
//   have only been used only once "recently", and L₂/frequent is for
//   entries that have been used twice or more "recently" (for a
//   particular definition of "recently" that we don't need to get
//   into).
//
//     Aside: Some of the ARC literature calls these lists "MRU" and
//     "MFU" for "most {recently,frequently} used", but that's wrong.
//     The L₂/frequent list is still ordered by recency of use.
//
//   Put another way, L₁/recent is an recency-ordered list of
//   recently-but-not-frequently-used entries, while L₂/frequent is an
//   recency-ordered list of frequently-used entries.
//
//   We'll get to DBL(2c)'s replacement algorithm later; the above
//   "shape" is enough of an introduction for now.
//
//   Now, the difference of ARC(c) over DBL(2c) is that ARC(c) splits
//   those lists into segments; L₁ gets split into a "top" part T₁ and
//   a "bottom" part B₁, and similarly L₂ gets split into a "top" part
//   T₂ and a "bottom" part B₂.  The "cache" is only made of of T₁ and
//   T₂; entries in B₁ and B₂ are evicted; the 4 lists together make
//   up a "directory" of what would be in DBL(2c).  That is:
//
//      cache     = T₁ ∪ T₂
//      directory = T₁ ∪ T₂ ∪ B₁ ∪ B₂
//      L₁        = T₁ ∪ B₁
//      L₂        = T₂ ∪ B₂
//
//   Let us say that entries in the T₁ or T₂ are "live entries", and
//   entries in B₁ or B₂ are "ghost entries".  The ghost entries make
//   up a record of recent evictions.  We could use the same struct
//   for live entries and ghost entries, and just have everything but
//   the key zeroed-out for ghost entries; but to simplify things
//   let's just have different structures:

type arcLiveEntry[K comparable, V any] struct {
	key K
	val V

	refs int           // for pinning
	del  chan struct{} // non-nil if a delete is waiting on .refs to drop to zero
}

type arcGhostEntry[K comparable] struct {
	key K
}

type arCache[K comparable, V any] struct {
	cap int // "c"
	src Source[K, V]

	mu sync.RWMutex

	// Now, the above was a sort of lie for this implementation;
	// for our pinning implementation, we we actually segment L₁
	// and L₂ into *three* segments rather than two segments: the
	// top part is pinned (and thus live) entries, the middle part
	// is live-but-not-pinned entries, and the bottom part is
	// ghost entries.
	//
	// We don't actually care about the order of the pinned
	// entries (the lists are ordered by recency-of-use, and
	// pinned entries are all "in-use", so they're all tied), but
	// it's convenient to maintain the set of them as sorted lists
	// the same as everything else.

	// L₁ / recently-but-not-frequently used entries
	recentPinned LinkedList[arcLiveEntry[K, V]] // top of L₁
	recentLive   LinkedList[arcLiveEntry[K, V]] // "T₁" for "top of L₁" (but really the middle)
	recentGhost  LinkedList[arcGhostEntry[K]]   // "B₁" for "bottom of L₁"

	// L₂ / frequently used entries
	frequentPinned LinkedList[arcLiveEntry[K, V]] // top of L₂
	frequentLive   LinkedList[arcLiveEntry[K, V]] // "T₂" for "top of L₂" (but really the middle)
	frequentGhost  LinkedList[arcGhostEntry[K]]   // "B₂" for "bottom of L₂"

	// Now, where to draw the split between the "live" part and
	// "ghost" parts of each list?  We'll use a paramenter
	// "p"/recentLiveTarget to decide which list to evict
	// (transition live→ghost) from whenever we need to do an
	// eviction.
	//
	// recentLiveTarget is the "target" len of
	// recentPinned+recentLive.  We allow the actual len to
	// deviate from this if the arCache as a whole isn't
	// over-capacity.  To say it again: recentLiveTarget is used
	// to decide *which* list to evict from, not *whether* we need
	// to evict.
	//
	// recentLiveTarget is always in the range [0, cap]; it never
	// goes negative, and never goes beyond cap.  Adjusting this
	// target is the main way that ARC is "adaptive", we could
	// instead define a "fixed replacement cache" policy FRC(p, c)
	// that has a static target.  But we'll get into that later.
	recentLiveTarget int // "p"

	// Other book-keeping.

	// For lookups.  The above ordered lists are about
	// eviction/replacement policies, but they don't help us when
	// we want to read something from the cache; we'd have to do
	// an O(n) scan through each list to find the item we're
	// looking for.  So maintain this separate index in order to
	// do O(1) lookups when we want to read from the cache.
	liveByName  map[K]*LinkedListEntry[arcLiveEntry[K, V]]
	ghostByName map[K]*LinkedListEntry[arcGhostEntry[K]]

	// Free lists.  Like the pinned lists, we don't actually care
	// about the order here, it's just convenient to use the same
	// ordered lists.
	unusedLive  LinkedList[arcLiveEntry[K, V]]
	unusedGhost LinkedList[arcGhostEntry[K]]

	// For blocking related to pinning.
	waiters LinkedList[chan struct{}]
}

// Algorithms:
//
//   Now that all of our data structures are defined, let's get into
//   the algorithms of updating them.
//
//   Before getting to the meaty ARC stuff, let's get some
//   boring/simple synchronization/blocking primitives out of the way:

// waitForEval is called before storing something into the cache.
// This is nescessary because if the cache is full and all entries are
// pinned, then we won't have to store the entry until something gets
// unpinned ("Release()d").
func (c *arCache[K, V]) waitForAvail() {
	if !(c.recentLive.IsEmpty() && c.frequentLive.IsEmpty() && c.unusedLive.IsEmpty()) {
		// There is already an available `arcLiveEntry` that
		// we can either use or evict.
		return
	}
	ch := make(chan struct{})
	c.waiters.Store(&LinkedListEntry[chan struct{}]{Value: ch})
	c.mu.Unlock()
	<-ch
	c.mu.Lock()
}

// notifyAvail is called when an entry gets unpinned ("Release()d"),
// and wakes up the highest-priority .waitForAvail() waiter (if there
// is one).
func (c *arCache[K, V]) notifyAvail() {
	waiter := c.waiters.Oldest
	if waiter == nil {
		return
	}
	c.waiters.Delete(waiter)
	close(waiter.Value)
}

// Calling .Delete(k) on an entry that is pinned needs to block until
// the entry is no longer pinned.
func (c *arCache[K, V]) unlockAndWaitForDel(entry *LinkedListEntry[arcLiveEntry[K, V]]) {
	if entry.Value.del == nil {
		entry.Value.del = make(chan struct{})
	}
	ch := entry.Value.del
	c.mu.Unlock()
	<-ch
}

// notifyOfDel unblocks any calls to .Delete(k), notifying them that
// the entry has been deleted and they can now return.
func (c *arCache[K, V]) notifyOfDel(entry *LinkedListEntry[arcLiveEntry[K, V]]) {
	if entry.Value.del != nil {
		close(entry.Value.del)
		entry.Value.del = nil
	}
}

//   OK, now to the main algorithm(s)!
//
//   To get this out of the way: Here are the invariants that the
//   algorithm(s) must enforce (see the paper for justification):
//
//     from DBL(2c):
//
//        • 0 ≤ |L₁|+|L₂| ≤ 2c
//        • 0 ≤ |L₁| ≤ c
//        • 0 ≤ |L₂| ≤ 2c
//
//     from Π(c):
//
//       Π(c) is the class of policies that ARC(c) belongs to... I
//       suppose that because of the changes we make to support
//       deletion, this implementation doesn't actually belong to
//       Π(c).
//
//        • A.1: The lists T₁, B₁, T₂, and B₂ are all mutually
//          disjoint.
//        • (not true) A.2: If |L₁|+|L₂| < c, then both B₁ and B₂ are
//          empty.  But supporting "delete" invalidates this!
//        • (not true) A.3: If |L₁|+|L₂| ≥ c, then |T₁|+|T₂| = c.  But
//          supporting "delete" invalidates this!
//        • A.4(a): Either (T₁ or B₁ is empty), or (the LRU page in T₁
//          is more recent than the MRU page in B₁).
//        • A.4(b): Either (T₂ or B₂ is empty), or (the LRU page in T₂
//          is more recent than the MRU page in B₂).
//        • A.5: |T₁∪T₂| is the set of pages that would be maintained
//          by the cache policy π(c).
//
//       The algorithm presented in the paper relies on A.2 and A.3 in
//       order to be correct; the algorithm had to be adjusted in
//       order to behave properly without relying on on those two
//       invariants.
//
//     from FRC(p, c):
//
//        • 0 ≤ p ≤ c
//
//   OK, at the beginning I said that ARC(c) is most reasonably
//   understood in terms of DBL(2c); to that end, we'll have two
//   replacement policies that are layered; the "dblReplace" policy
//   that is used in the cases when DBL(2c) would have a cache-miss,
//   and the "arcReplace" policy that is used when ARC(c) has a
//   cache-miss but DBL(2c) wouldn't have (and within dblReplace).

// dblReplace is the DBL(2c) replacement policy.
//
// It returns an entry that is not in any list (c.recentPinned,
// c.recentLive, c.frequentPinned, c.frequentLive, or c.unusedLive),
// and is ready to be stored into a list by the caller.
func (c *arCache[K, V]) dblReplace() *LinkedListEntry[arcLiveEntry[K, V]] {
	c.waitForAvail()

	// The DBL(2c) replacement policy is quite simple: "Replace
	// the LRU page in L₁, if L₁ contains exactly c pages;
	// otherwise, replace the LRU page in L₂"
	//
	// This looks a touch more complicated than a simple DBL(2c)
	// implementation would look, but that's just because L₁ and
	// L₂ are not implemented as uniform lists; instead of "the
	// LRU entry of L₁" being a simple `c.recent.Oldest`, we have
	// to check the 3 different segments of L₁.

	recentLen := c.recentPinned.Len + c.recentLive.Len + c.recentGhost.Len // |L₁|
	switch {
	case recentLen == c.cap:
		// Use the LRU entry from L₁.
		//
		// Note that *even when* there are available entries
		// from c.unusedLive, we still do this and evict the
		// LRU entry from L₁ in order to avoid violating the
		// `0 ≤ |L₁| ≤ c` invariant.
		switch {
		case !c.recentGhost.IsEmpty(): // bottom
			return c.arcReplace(c.recentGhost.Oldest, false)
		case !c.recentLive.IsEmpty(): // middle
			entry := c.recentLive.Oldest
			c.recentLive.Delete(entry)
			delete(c.liveByName, entry.Value.key)
			return entry
		default: // case !c.recentPinned.IsEmpty(): // top

			// This can't happen because `c.recentLen == c.cap &&
			// c.recentGhost.IsEmpty() && c.recentLive.IsEmpty()`
			// implies that `c.recentPinned.Len == c.cap`, which
			// can't be true if c.waitForAvail() returned.
			panic(fmt.Errorf("should not happen: lengths don't match up"))
		}
	case recentLen < c.cap:
		// If the directory is full, use the LRU entry from
		// L₂; otherwise use a free (unused) entry.
		switch {
		// Cache is not full: use a free entry.
		case !c.unusedLive.IsEmpty():
			entry := c.unusedLive.Oldest
			c.unusedLive.Delete(entry)
			return entry
		case !c.unusedGhost.IsEmpty():
			return c.arcReplace(c.unusedGhost.Oldest, false)
		// Cache is full: use the LRU entry from L₂
		case !c.frequentGhost.IsEmpty():
			return c.arcReplace(c.frequentGhost.Oldest, false)
		default:
			// This can't happen because `recentLen < c.cap` implies
			// that `c.recentGhost.Len < c.cap`, which means that
			// there is at least one ghost entry that is available
			// in c.unusedGhost or c.frequentGhost.
			panic(fmt.Errorf("should not happen: lengths don't match up"))
		}
	default: // case recentLen > c.cap:
		// Can't happen because `0 ≤ |L₁| ≤ c` is one of the
		// invariants from DBL(2c); the above policy will
		// never let it happen.
		panic(fmt.Errorf("should not happen: recentLen:%v > cap:%v", recentLen, c.cap))
	}
}

// arcReplace is the ARC(c) replacement policy.
//
// It returns an entry that is not in any list (c.recentPinned,
// c.recentLive, c.frequentPinned, c.frequentLive, or c.unusedLive),
// and is ready to be stored into a list by the caller.
//
// If an eviction is performed, `ghostEntry` is a pointer to the entry
// object that is used as a record of the eviction.  `ghostEntry`
// should still be present in its old list, in case an eviction is not
// performed and the `ghostEntry` object is not modified.
//
// The `arbitrary` argument is arbitrary, see the quote about it
// below.
func (c *arCache[K, V]) arcReplace(ghostEntry *LinkedListEntry[arcGhostEntry[K]], arbitrary bool) *LinkedListEntry[arcLiveEntry[K, V]] {
	c.waitForAvail()

	// If the cache isn't full, no need to do an eviction.  (This
	// check is a nescessary enhancement over standard ARC in
	// order to support "delete"; because without "delete",
	// arcReplace wouldn't ever be called when the cache isn't
	// full.)
	if entry := c.unusedLive.Oldest; entry != nil {
		c.unusedLive.Delete(entry)
		return entry
	}

	// We'll be evicting.  Prep ghostEntry to record that fact.
	if ghostEntry.List != &c.unusedGhost {
		delete(c.ghostByName, ghostEntry.Value.key)
	}
	ghostEntry.List.Delete(ghostEntry)

	// Note that from here on out, this policy changes *neither*
	// |L₁| nor |L₂|; shortenings were already done by the above
	// `ghostEntry.List.Delete(ghostEntry)` call, and lengthenings
	// will be done by the caller with the returned `entry`.  All
	// this policy is doing doing from here on out is changing the
	// split between T/B.

	// We have to make a binary choice about whether to evict
	// c.recentLive→c.recentGhost or
	// c.frequentLive→c.frequentGhost.
	var evictFrom *LinkedList[arcLiveEntry[K, V]]
	var evictTo *LinkedList[arcGhostEntry[K]]

	// Make the decision.
	recentLive := c.recentPinned.Len + c.recentLive.Len
	switch { // NB: Also check .IsEmpty() in order to support pinning.
	case recentLive > c.recentLiveTarget, c.frequentLive.IsEmpty():
		evictFrom, evictTo = &c.recentLive, &c.recentGhost
	case recentLive < c.recentLiveTarget, c.recentLive.IsEmpty():
		evictFrom, evictTo = &c.frequentLive, &c.frequentGhost
	case recentLive == c.recentLiveTarget:
		// The original paper says "The last replacement
		// decision is somewhat arbitrary, and can be made
		// differently if desired."
		if arbitrary && c.recentLive.Len > 0 {
			evictFrom, evictTo = &c.recentLive, &c.recentGhost
		} else {
			evictFrom, evictTo = &c.frequentLive, &c.frequentGhost
		}
	}

	// Act on the decision.
	entry := evictFrom.Oldest
	// Evict.
	delete(c.liveByName, entry.Value.key)
	evictFrom.Delete(entry)
	// Record the eviction.
	ghostEntry.Value.key = entry.Value.key
	evictTo.Store(ghostEntry)

	return entry
}

//   OK, now that we have our replacement policies defined, it's
//   pretty obvious how to wire them into the "acquire an entry"
//   algorithm.  The only parts here that aren't obvious are:
//
//     - the "adapt" step that adjusts c.recentLiveTarget.  Read the
//       paper for an explanation of why the formulas used do a good
//       job of quickly adapting to various workloads.
//
//     - the `ghostEntry.List == &c.frequentGhost` argument to
//       arcReplace in the "cache-miss, but would have been a
//       cache-hit in DBL(2c)" case.  The short answer is that it's
//       arbitrary (as discussed in comments in arcReplace), but
//       matches what's in the original paper.

// Acquire implements the 'Cache' interface.
func (c *arCache[K, V]) Acquire(ctx context.Context, k K) *V {
	var entry *LinkedListEntry[arcLiveEntry[K, V]]
	switch {
	case c.liveByName[k] != nil: // cache-hit
		entry = c.liveByName[k]
		if entry.List != &c.frequentPinned {
			// Move to frequentPinned (unless it's already
			// there; in which case, don't bother).
			entry.List.Delete(entry)
			c.frequentPinned.Store(entry)
		}
		entry.Value.refs++
	case c.ghostByName[k] != nil: // cache-miss, but would have been a cache-hit in DBL(2c)
		ghostEntry := c.ghostByName[k]
		// Adapt.
		switch ghostEntry.List {
		case &c.recentGhost:
			// Recency is doing well right now; invest toward recency.
			c.recentLiveTarget = min(c.recentLiveTarget+max(1, c.frequentGhost.Len/c.recentGhost.Len), c.cap)
		case &c.frequentGhost:
			// Frequency is doing well right now; invest toward frequency.
			c.recentLiveTarget = max(c.recentLiveTarget-max(1, c.recentGhost.Len/c.frequentGhost.Len), 0)
		}
		// Replace.
		entry = c.arcReplace(ghostEntry, ghostEntry.List == &c.frequentGhost)
		entry.Value.key = k
		c.src.Load(ctx, k, &entry.Value.val)
		entry.Value.refs = 1
		c.liveByName[k] = entry
	default: // cache-miss, and would have even been a cache-miss in DBL(2c)
		// Replace.
		entry = c.dblReplace()
		entry.Value.key = k
		c.src.Load(ctx, k, &entry.Value.val)
		entry.Value.refs = 1
		c.liveByName[k] = entry
	}
	return &entry.Value.val
}

//   Given everything that we've already explained, I think it's fair to call
//   the remaining code "boilerplate".

// Delete implements the 'Cache' interface.
func (c *arCache[K, V]) Delete(k K) {
	c.mu.Lock()

	if entry := c.liveByName[k]; entry != nil {
		if entry.Value.refs > 0 {
			// Let .Release(k) do the deletion when the
			// refcount drops to 0.
			c.unlockAndWaitForDel(entry)
			return
		}
		delete(c.liveByName, entry.Value.key)
		entry.List.Delete(entry)
		c.unusedLive.Store(entry)
	} else if entry := c.ghostByName[k]; entry != nil {
		delete(c.ghostByName, k)
		entry.List.Delete(entry)
		c.unusedGhost.Store(entry)
	}

	// No need to call c.notifyAvail(); if we were able to delete
	// it, it was already available.

	c.mu.Unlock()
}

// Release implements the 'Cache' interface.
func (c *arCache[K, V]) Release(k K) {
	c.mu.Lock()
	defer c.mu.Unlock()

	entry := c.liveByName[k]
	if entry == nil || entry.Value.refs <= 0 {
		panic(fmt.Errorf("caching.arCache.Release called on key that is not held: %v", k))
	}

	entry.Value.refs--
	if entry.Value.refs == 0 {
		switch {
		case entry.Value.del != nil:
			delete(c.liveByName, entry.Value.key)
			entry.List.Delete(entry)
			c.unusedLive.Store(entry)
			c.notifyOfDel(entry)
		case entry.List == &c.recentPinned:
			c.recentPinned.Delete(entry)
			c.recentLive.Store(entry)
		case entry.List == &c.frequentPinned:
			c.frequentPinned.Delete(entry)
			c.frequentLive.Store(entry)
		}
		c.notifyAvail()
	}
}

// Flush implements the 'Cache' interface.
func (c *arCache[K, V]) Flush(ctx context.Context) {
	c.mu.Lock()
	defer c.mu.Unlock()

	for _, list := range []*LinkedList[arcLiveEntry[K, V]]{
		&c.recentPinned,
		&c.recentLive,
		&c.frequentPinned,
		&c.frequentLive,
		&c.unusedLive,
	} {
		for entry := list.Oldest; entry != nil; entry = entry.Newer {
			c.src.Flush(ctx, &entry.Value.val)
		}
	}
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}