From f6f0a251ed962374f69e9fd7722dcd5c44aa58ad Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 24 Mar 2023 21:49:26 -0600
Subject: containers: Rethink the caching libraries

---
 lib/containers/arcache_test.go | 296 +++++++++++++++++++++++++++++++----------
 1 file changed, 229 insertions(+), 67 deletions(-)

(limited to 'lib/containers/arcache_test.go')

diff --git a/lib/containers/arcache_test.go b/lib/containers/arcache_test.go
index 507dd04..3e858ec 100644
--- a/lib/containers/arcache_test.go
+++ b/lib/containers/arcache_test.go
@@ -9,103 +9,262 @@ package containers
 
 import (
 	"bytes"
+	"context"
 	"crypto/rand"
 	"fmt"
-	"math"
 	"math/big"
+	"sort"
 	"testing"
 
 	"github.com/datawire/dlib/derror"
+	"github.com/datawire/dlib/dlog"
 	"github.com/stretchr/testify/require"
 )
 
 // Add runtime validity checks /////////////////////////////////////////////////
 
-func (c *ARCache[K, V]) err(e error) error {
-	return fmt.Errorf("%[1]T(%[1]p): %w (b1:%v t1:%v / t2:%v b2:%v)",
-		c, e,
-		c.recentGhost.Len(),
-		c.recentLive.Len(),
-		c.frequentLive.Len(),
-		c.frequentGhost.Len())
+func (c *arc[K, V]) logf(format string, a ...any) {
+	c.t.Helper()
+	c.t.Logf("%[1]T(%[1]p): %s (b1:%v t1:%v p1:%v / p1:%v, t2:%v b2:%v)",
+		c,
+		fmt.Sprintf(format, a...),
+		c.recentGhost.Len,
+		c.recentLive.Len,
+		c.recentPinned.Len,
+		c.frequentPinned.Len,
+		c.frequentLive.Len,
+		c.frequentGhost.Len)
 }
 
-func (c *ARCache[K, V]) check() {
-	if c.MaxLen < 2 {
-		panic(c.err(fmt.Errorf("MaxLen:%v < 2", c.MaxLen)))
-	}
+func (c *arc[K, V]) fatalf(format string, a ...any) {
+	c.logf(format, a...)
+	c.t.FailNow()
+}
 
-	if fullLen := c.fullLen(); fullLen > 2*c.MaxLen {
-		panic(c.err(fmt.Errorf("fullLen:%v > MaxLen:%v", fullLen, c.MaxLen)))
+func (c *arc[K, V]) check() {
+	if c.noCheck {
+		return
 	}
+	c.t.Helper()
 
-	if liveLen := c.liveLen(); liveLen > c.MaxLen {
-		panic(c.err(fmt.Errorf("liveLen:%v > MaxLen:%v", liveLen, c.MaxLen)))
-	}
-	if ghostLen := c.ghostLen(); ghostLen > c.MaxLen {
-		panic(c.err(fmt.Errorf("ghostLen:%v > MaxLen:%v", ghostLen, c.MaxLen)))
-	}
-	if recentLen := c.recentLen(); recentLen > c.MaxLen {
-		panic(c.err(fmt.Errorf("recentLen:%v > MaxLen:%v", recentLen, c.MaxLen)))
+	// Do the slow parts for 1/32 of all calls.
+	fullCheck := getRand(c.t, 32) == 0
+
+	// Check that the lists are in-sync with the maps.
+	if fullCheck {
+		liveEntries := make(map[*LinkedListEntry[arcLiveEntry[K, V]]]int, len(c.liveByName))
+		for _, list := range c.liveLists {
+			for entry := list.Oldest; entry != nil; entry = entry.Newer {
+				liveEntries[entry]++
+			}
+		}
+		for _, entry := range c.liveByName {
+			liveEntries[entry]--
+			if liveEntries[entry] == 0 {
+				delete(liveEntries, entry)
+			}
+		}
+		require.Len(c.t, liveEntries, 0)
+
+		ghostEntries := make(map[*LinkedListEntry[arcGhostEntry[K]]]int, len(c.ghostByName))
+		for _, list := range c.ghostLists {
+			for entry := list.Oldest; entry != nil; entry = entry.Newer {
+				ghostEntries[entry]++
+			}
+		}
+		for _, entry := range c.ghostByName {
+			ghostEntries[entry]--
+			if ghostEntries[entry] == 0 {
+				delete(ghostEntries, entry)
+			}
+		}
+		require.Len(c.t, ghostEntries, 0)
+	}
+
+	// Check the invariants.
+
+	// from DBL(2c):
+	//
+	//    • 0 ≤ |L₁|+|L₂| ≤ 2c
+	if fullLen := c.recentPinned.Len + c.recentLive.Len + c.recentGhost.Len + c.frequentPinned.Len + c.frequentLive.Len + c.frequentGhost.Len; fullLen < 0 || fullLen > 2*c.cap {
+		c.fatalf("! ( 0 <= fullLen:%v <= 2*cap:%v )", fullLen, c.cap)
+	}
+	//    • 0 ≤ |L₁| ≤ c
+	if recentLen := c.recentPinned.Len + c.recentLive.Len + c.recentGhost.Len; recentLen < 0 || recentLen > c.cap {
+		c.fatalf("! ( 0 <= recentLen:%v <= cap:%v )", recentLen, c.cap)
+	}
+	//    • 0 ≤ |L₂| ≤ 2c
+	if frequentLen := c.frequentPinned.Len + c.frequentLive.Len + c.frequentGhost.Len; frequentLen < 0 || frequentLen > 2*c.cap {
+		c.fatalf("! ( 0 <= frequentLen:%v <= 2*cap:%v )", frequentLen, c.cap)
+	}
+	//
+	// from Π(c):
+	//
+	//    • A.1: The lists T₁, B₁, T₂, and B₂ are all mutually
+	//      disjoint.
+	if fullCheck {
+		keys := make(map[K]int, len(c.liveByName)+len(c.ghostByName))
+		for _, list := range c.liveLists {
+			for entry := list.Oldest; entry != nil; entry = entry.Newer {
+				keys[entry.Value.key]++
+			}
+		}
+		for _, list := range c.ghostLists {
+			for entry := list.Oldest; entry != nil; entry = entry.Newer {
+				keys[entry.Value.key]++
+			}
+		}
+		for key, cnt := range keys {
+			if cnt > 1 {
+				listNames := make([]string, 0, cnt)
+				for listName, list := range c.liveLists {
+					for entry := list.Oldest; entry != nil; entry = entry.Newer {
+						if entry.Value.key == key {
+							listNames = append(listNames, listName)
+						}
+					}
+				}
+				for listName, list := range c.ghostLists {
+					for entry := list.Oldest; entry != nil; entry = entry.Newer {
+						if entry.Value.key == key {
+							listNames = append(listNames, listName)
+						}
+					}
+				}
+				sort.Strings(listNames)
+				c.fatalf("dup key: %v is in %v", key, listNames)
+			}
+		}
 	}
-	if frequentLen := c.frequentLen(); frequentLen > c.MaxLen {
-		panic(c.err(fmt.Errorf("frequentLen:%v > MaxLen:%v", frequentLen, c.MaxLen)))
+	//    • (not true) A.2: If |L₁|+|L₂| < c, then both B₁ and B₂ are
+	//      empty.  But supporting "delete" invalidates this!
+	//    • (not true) A.3: If |L₁|+|L₂| ≥ c, then |T₁|+|T₂| = c.  But
+	//      supporting "delete" invalidates this!
+	//    • A.4(a): Either (T₁ or B₁ is empty), or (the LRU page in T₁
+	//      is more recent than the MRU page in B₁).
+	//    • A.4(b): Either (T₂ or B₂ is empty), or (the LRU page in T₂
+	//      is more recent than the MRU page in B₂).
+	//    • A.5: |T₁∪T₂| is the set of pages that would be maintained
+	//      by the cache policy π(c).
+	//
+	// from FRC(p, c):
+	//
+	//    • 0 ≤ p ≤ c
+	if c.recentLiveTarget < 0 || c.recentLiveTarget > c.cap {
+		c.fatalf("! ( 0 <= p:%v <= cap:%v )", c.recentLiveTarget, c.cap)
 	}
 }
 
 // Compatibility layer for hashicorp/golang-lru ////////////////////////////////
 
+type lenFunc func() int
+
+func (fn lenFunc) Len() int { return fn() }
+
 type arc[K comparable, V any] struct {
-	ARCache[K, V]
-	t1, t2 *lruCache[K, V]
-	b1, b2 *lruCache[K, struct{}]
+	*arCache[K, V]
+	ctx context.Context //nolint:containedctx // have no choice to keep the hashicorp-compatible API
+	t   testing.TB
+
+	t1, t2, b1, b2 lenFunc
+
+	// For speeding up .check()
+	noCheck    bool
+	liveLists  map[string]*LinkedList[arcLiveEntry[K, V]]
+	ghostLists map[string]*LinkedList[arcGhostEntry[K]]
 }
 
-func NewARC[K comparable, V any](size int) (*arc[K, V], error) {
+func NewARC[K comparable, V any](t testing.TB, size int) (*arc[K, V], error) {
+	src := SourceFunc[K, V](func(context.Context, K, *V) {})
+	_, isBench := t.(*testing.B)
 	ret := &arc[K, V]{
-		ARCache: ARCache[K, V]{
-			MaxLen: size,
-		},
-	}
-	ret.t1 = &ret.recentLive
-	ret.t2 = &ret.frequentLive
-	ret.b1 = &ret.recentGhost
-	ret.b2 = &ret.frequentGhost
+		ctx:     dlog.NewTestContext(t, true),
+		t:       t,
+		noCheck: isBench,
+	}
+	ret.init(size, src)
 	return ret, nil
 }
 
-func (c *arc[K, V]) Contains(k K) bool { return c.Has(k) }
-func (c *arc[K, V]) Get(k K) (V, bool) { return c.Load(k) }
-func (c *arc[K, V]) Add(k K, v V)      { c.Store(k, v) }
-func (c *arc[K, V]) Remove(k K)        { c.Delete(k) }
+func (c *arc[K, V]) init(size int, src Source[K, V]) {
+	c.arCache = NewARCache[K, V](size, src).(*arCache[K, V])
+	c.t1 = lenFunc(func() int { return c.arCache.recentLive.Len })
+	c.t2 = lenFunc(func() int { return c.arCache.frequentLive.Len })
+	c.b1 = lenFunc(func() int { return c.arCache.recentGhost.Len })
+	c.b2 = lenFunc(func() int { return c.arCache.frequentGhost.Len })
+
+	c.liveLists = map[string]*LinkedList[arcLiveEntry[K, V]]{
+		"p1": &c.recentPinned,
+		"t1": &c.recentLive,
+		"p2": &c.frequentPinned,
+		"t2": &c.frequentLive,
+	}
+	c.ghostLists = map[string]*LinkedList[arcGhostEntry[K]]{
+		"b1": &c.recentGhost,
+		"b2": &c.frequentGhost,
+	}
+}
+
+// non-mutators
+
 func (c *arc[K, V]) p() int            { return c.recentLiveTarget }
-func (c *arc[K, V]) Purge() {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	c.recentLive = lruCache[K, V]{}
-	c.recentGhost = lruCache[K, struct{}]{}
-	c.frequentLive = lruCache[K, V]{}
-	c.frequentGhost = lruCache[K, struct{}]{}
-	c.recentLiveTarget = 0
+func (c *arc[K, V]) Len() int          { return len(c.liveByName) }
+func (c *arc[K, V]) Contains(k K) bool { return c.liveByName[k] != nil }
+
+func (c *arc[K, V]) Peek(k K) (V, bool) {
+	entry := c.liveByName[k]
+	if entry == nil {
+		var zero V
+		return zero, false
+	}
+	return entry.Value.val, true
 }
 
 func (c *arc[K, V]) Keys() []K {
-	c.mu.RLock()
-	defer c.mu.RUnlock()
-	ret := make([]K, 0, c.Len())
-	for entry := c.recentLive.byAge.oldest; entry != nil; entry = entry.newer {
+	ret := make([]K, 0, len(c.liveByName))
+	for entry := c.recentLive.Oldest; entry != nil; entry = entry.Newer {
 		ret = append(ret, entry.Value.key)
 	}
-	for entry := c.frequentLive.byAge.oldest; entry != nil; entry = entry.newer {
+	for entry := c.frequentLive.Oldest; entry != nil; entry = entry.Newer {
 		ret = append(ret, entry.Value.key)
 	}
 	return ret
 }
 
+// mutators
+
+func (c *arc[K, V]) Remove(k K) {
+	defer c.check()
+	c.Delete(k)
+}
+
+func (c *arc[K, V]) Purge() {
+	defer c.check()
+	c.init(c.cap, c.src)
+}
+
+func (c *arc[K, V]) Get(k K) (V, bool) {
+	defer c.check()
+	if !c.Contains(k) {
+		var zero V
+		return zero, false
+	}
+	val := *c.Acquire(c.ctx, k)
+	c.Release(k)
+	return val, true
+}
+
+func (c *arc[K, V]) Add(k K, v V) {
+	defer c.check()
+	ptr := c.Acquire(c.ctx, k)
+	*ptr = v
+	c.Release(k)
+}
+
 // Tests from hashicorp/golang-lru /////////////////////////////////////////////
 
-func getRand(tb testing.TB) int64 {
-	out, err := rand.Int(rand.Reader, big.NewInt(math.MaxInt64))
+func getRand(tb testing.TB, limit int64) int64 {
+	out, err := rand.Int(rand.Reader, big.NewInt(limit))
 	if err != nil {
 		tb.Fatal(err)
 	}
@@ -113,14 +272,14 @@ func getRand(tb testing.TB) int64 {
 }
 
 func BenchmarkARC_Rand(b *testing.B) {
-	l, err := NewARC[int64, int64](8192)
+	l, err := NewARC[int64, int64](b, 8192)
 	if err != nil {
 		b.Fatalf("err: %v", err)
 	}
 
 	trace := make([]int64, b.N*2)
 	for i := 0; i < b.N*2; i++ {
-		trace[i] = getRand(b) % 32768
+		trace[i] = getRand(b, 32768)
 	}
 
 	b.ResetTimer()
@@ -141,7 +300,7 @@ func BenchmarkARC_Rand(b *testing.B) {
 }
 
 func BenchmarkARC_Freq(b *testing.B) {
-	l, err := NewARC[int64, int64](8192)
+	l, err := NewARC[int64, int64](b, 8192)
 	if err != nil {
 		b.Fatalf("err: %v", err)
 	}
@@ -149,9 +308,9 @@ func BenchmarkARC_Freq(b *testing.B) {
 	trace := make([]int64, b.N*2)
 	for i := 0; i < b.N*2; i++ {
 		if i%2 == 0 {
-			trace[i] = getRand(b) % 16384
+			trace[i] = getRand(b, 16384)
 		} else {
-			trace[i] = getRand(b) % 32768
+			trace[i] = getRand(b, 32768)
 		}
 	}
 
@@ -234,7 +393,7 @@ func FuzzARC(f *testing.F) {
 
 func testARC_RandomOps(t *testing.T, ops []arcOp) {
 	size := 128
-	l, err := NewARC[int64, int64](128)
+	l, err := NewARC[int64, int64](t, 128)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -264,7 +423,7 @@ func testARC_RandomOps(t *testing.T, ops []arcOp) {
 
 func TestARC_Get_RecentToFrequent(t *testing.T) {
 	t.Parallel()
-	l, err := NewARC[int, int](128)
+	l, err := NewARC[int, int](t, 128)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -309,7 +468,7 @@ func TestARC_Get_RecentToFrequent(t *testing.T) {
 
 func TestARC_Add_RecentToFrequent(t *testing.T) {
 	t.Parallel()
-	l, err := NewARC[int, int](128)
+	l, err := NewARC[int, int](t, 128)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -344,7 +503,7 @@ func TestARC_Add_RecentToFrequent(t *testing.T) {
 
 func TestARC_Adaptive(t *testing.T) {
 	t.Parallel()
-	l, err := NewARC[int, int](4)
+	l, err := NewARC[int, int](t, 4)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -353,13 +512,16 @@ func TestARC_Adaptive(t *testing.T) {
 	for i := 0; i < 4; i++ {
 		l.Add(i, i)
 	}
+	require.Equal(t, `[   _0_    _1_    _2_    _3_ !^]___    ___    ___    ___    `, l.String())
 	if n := l.t1.Len(); n != 4 {
 		t.Fatalf("bad: %d", n)
 	}
 
 	// Move to t2
 	l.Get(0)
+	require.Equal(t, `    ___[   _1_    _2_    _3_ !^ _0_   ]___    ___    ___    `, l.String())
 	l.Get(1)
+	require.Equal(t, `    ___    ___[   _2_    _3_ !^ _1_    _0_   ]___    ___    `, l.String())
 	if n := l.t2.Len(); n != 2 {
 		t.Fatalf("bad: %d", n)
 	}
@@ -463,7 +625,7 @@ func TestARC_Adaptive(t *testing.T) {
 
 func TestARC(t *testing.T) {
 	t.Parallel()
-	l, err := NewARC[int, int](128)
+	l, err := NewARC[int, int](t, 128)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -509,7 +671,7 @@ func TestARC(t *testing.T) {
 // Test that Contains doesn't update recent-ness
 func TestARC_Contains(t *testing.T) {
 	t.Parallel()
-	l, err := NewARC[int, int](2)
+	l, err := NewARC[int, int](t, 2)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
@@ -529,7 +691,7 @@ func TestARC_Contains(t *testing.T) {
 // Test that Peek doesn't update recent-ness
 func TestARC_Peek(t *testing.T) {
 	t.Parallel()
-	l, err := NewARC[int, int](2)
+	l, err := NewARC[int, int](t, 2)
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-- 
cgit v1.2.3-2-g168b