Merge pull request #1811 from ingvagabund/kube-client-sandbox

refactor(pkg/operator): replace informerResource with a kubeClientSandbox
refactor(kubeClientSandbox): keep a cache of evicted pods and allow to reset it at the end of each descheduling cycle
2026-01-25 20:59:28 +01:00 · 2026-01-22 19:59:28 +05:30 · 2026-01-22 14:49:47 +01:00 · 2026-01-22 14:44:41 +01:00 · 2026-01-22 14:44:32 +01:00 · 2026-01-22 14:44:14 +01:00
2 changed files with 607 additions and 95 deletions
--- a/pkg/descheduler/descheduler.go
+++ b/pkg/descheduler/descheduler.go
@@ -22,6 +22,7 @@ import (
 	"math"
 	"net/http"
 	"strconv"
+	"sync"
 	"time"

 	promapi "github.com/prometheus/client_golang/api"
@@ -84,16 +85,61 @@ type profileRunner struct {
 	descheduleEPs, balanceEPs eprunner
 }

+// evictedPodInfo stores identifying information about a pod that was evicted during dry-run mode
+type evictedPodInfo struct {
+	Namespace string
+	Name      string
+	UID       string
+}
+
+// evictedPodsCache is a thread-safe cache for tracking pods evicted during dry-run mode
+type evictedPodsCache struct {
+	sync.RWMutex
+	pods map[string]*evictedPodInfo
+}
+
+func newEvictedPodsCache() *evictedPodsCache {
+	return &evictedPodsCache{
+		pods: make(map[string]*evictedPodInfo),
+	}
+}
+
+func (c *evictedPodsCache) add(pod *v1.Pod) {
+	c.Lock()
+	defer c.Unlock()
+	c.pods[string(pod.UID)] = &evictedPodInfo{
+		Namespace: pod.Namespace,
+		Name:      pod.Name,
+		UID:       string(pod.UID),
+	}
+}
+
+func (c *evictedPodsCache) list() []*evictedPodInfo {
+	c.RLock()
+	defer c.RUnlock()
+	pods := make([]*evictedPodInfo, 0, len(c.pods))
+	for _, pod := range c.pods {
+		podCopy := *pod
+		pods = append(pods, &podCopy)
+	}
+	return pods
+}
+
+func (c *evictedPodsCache) clear() {
+	c.Lock()
+	defer c.Unlock()
+	c.pods = make(map[string]*evictedPodInfo)
+}
+
 type descheduler struct {
 	rs                                *options.DeschedulerServer
-	ir                                *informerResources
+	kubeClientSandbox                 *kubeClientSandbox
 	getPodsAssignedToNode             podutil.GetPodsAssignedToNodeFunc
 	sharedInformerFactory             informers.SharedInformerFactory
 	namespacedSecretsLister           corev1listers.SecretNamespaceLister
 	deschedulerPolicy                 *api.DeschedulerPolicy
 	eventRecorder                     events.EventRecorder
 	podEvictor                        *evictions.PodEvictor
-	podEvictionReactionFnc            func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error)
 	metricsCollector                  *metricscollector.MetricsCollector
 	prometheusClient                  promapi.Client
 	previousPrometheusClientTransport *http.Transport
@@ -102,34 +148,46 @@ type descheduler struct {
 	metricsProviders                  map[api.MetricsSource]*api.MetricsProvider
 }

-type informerResources struct {
-	sharedInformerFactory informers.SharedInformerFactory
-	resourceToInformer    map[schema.GroupVersionResource]informers.GenericInformer
+// kubeClientSandbox creates a sandbox environment with a fake client and informer factory
+// that mirrors resources from a real client, useful for dry-run testing scenarios
+type kubeClientSandbox struct {
+	client                 clientset.Interface
+	sharedInformerFactory  informers.SharedInformerFactory
+	fakeKubeClient         *fakeclientset.Clientset
+	fakeFactory            informers.SharedInformerFactory
+	resourceToInformer     map[schema.GroupVersionResource]informers.GenericInformer
+	evictedPodsCache       *evictedPodsCache
+	podEvictionReactionFnc func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error)
 }

-func newInformerResources(sharedInformerFactory informers.SharedInformerFactory) *informerResources {
-	return &informerResources{
-		sharedInformerFactory: sharedInformerFactory,
-		resourceToInformer:    make(map[schema.GroupVersionResource]informers.GenericInformer),
+func newKubeClientSandbox(client clientset.Interface, sharedInformerFactory informers.SharedInformerFactory, resources ...schema.GroupVersionResource) (*kubeClientSandbox, error) {
+	sandbox := &kubeClientSandbox{
+		client:                 client,
+		sharedInformerFactory:  sharedInformerFactory,
+		resourceToInformer:     make(map[schema.GroupVersionResource]informers.GenericInformer),
+		evictedPodsCache:       newEvictedPodsCache(),
+		podEvictionReactionFnc: podEvictionReactionFnc,
 	}
-}

-func (ir *informerResources) Uses(resources ...schema.GroupVersionResource) error {
 	for _, resource := range resources {
-		informer, err := ir.sharedInformerFactory.ForResource(resource)
+		informer, err := sharedInformerFactory.ForResource(resource)
 		if err != nil {
-			return err
+			return nil, err
 		}
-
-		ir.resourceToInformer[resource] = informer
+		sandbox.resourceToInformer[resource] = informer
 	}
-	return nil
+
+	return sandbox, nil
 }

-// CopyTo Copy informer subscriptions to the new factory and objects to the fake client so that the backing caches are populated for when listers are used.
-func (ir *informerResources) CopyTo(fakeClient *fakeclientset.Clientset, newFactory informers.SharedInformerFactory) error {
-	for resource, informer := range ir.resourceToInformer {
-		_, err := newFactory.ForResource(resource)
+func (sandbox *kubeClientSandbox) buildSandbox() error {
+	sandbox.fakeKubeClient = fakeclientset.NewSimpleClientset()
+	// simulate a pod eviction by deleting a pod
+	sandbox.fakeKubeClient.PrependReactor("create", "pods", sandbox.podEvictionReactionFnc(sandbox.fakeKubeClient, sandbox.evictedPodsCache))
+	sandbox.fakeFactory = informers.NewSharedInformerFactory(sandbox.fakeKubeClient, 0)
+
+	for resource, informer := range sandbox.resourceToInformer {
+		_, err := sandbox.fakeFactory.ForResource(resource)
 		if err != nil {
 			return fmt.Errorf("error getting resource %s: %w", resource, err)
 		}
@@ -140,12 +198,67 @@ func (ir *informerResources) CopyTo(fakeClient *fakeclientset.Clientset, newFact
 		}

 		for _, object := range objects {
-			fakeClient.Tracker().Add(object)
+			if err := sandbox.fakeKubeClient.Tracker().Add(object); err != nil {
+				return fmt.Errorf("error adding object to tracker: %w", err)
+			}
 		}
 	}
+
 	return nil
 }

+func (sandbox *kubeClientSandbox) fakeClient() *fakeclientset.Clientset {
+	return sandbox.fakeKubeClient
+}
+
+func (sandbox *kubeClientSandbox) fakeSharedInformerFactory() informers.SharedInformerFactory {
+	return sandbox.fakeFactory
+}
+
+func (sandbox *kubeClientSandbox) reset() {
+	sandbox.evictedPodsCache.clear()
+}
+
+func nodeSelectorFromPolicy(deschedulerPolicy *api.DeschedulerPolicy) (labels.Selector, error) {
+	nodeSelector := labels.Everything()
+	if deschedulerPolicy.NodeSelector != nil {
+		sel, err := labels.Parse(*deschedulerPolicy.NodeSelector)
+		if err != nil {
+			return nil, err
+		}
+		nodeSelector = sel
+	}
+	return nodeSelector, nil
+}
+
+func addNodeSelectorIndexer(sharedInformerFactory informers.SharedInformerFactory, nodeSelector labels.Selector) error {
+	return nodeutil.AddNodeSelectorIndexer(sharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector)
+}
+
+func setupInformerIndexers(sharedInformerFactory informers.SharedInformerFactory, deschedulerPolicy *api.DeschedulerPolicy) (podutil.GetPodsAssignedToNodeFunc, error) {
+	// create a new instance of the shared informer factory from the cached client
+	// register the pod informer, otherwise it will not get running
+	getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(sharedInformerFactory.Core().V1().Pods().Informer())
+	if err != nil {
+		return nil, fmt.Errorf("build get pods assigned to node function error: %v", err)
+	}
+
+	// TODO(ingvagabund): copy paste all relevant indexers from the real client to the fake one
+	// TODO(ingvagabund): register one indexer per each profile. Respect the precedence of no profile-level node selector is specified.
+	//                    Also, keep a cache of node label selectors to detect duplicates to avoid creating an extra informer.
+
+	nodeSelector, err := nodeSelectorFromPolicy(deschedulerPolicy)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := addNodeSelectorIndexer(sharedInformerFactory, nodeSelector); err != nil {
+		return nil, err
+	}
+
+	return getPodsAssignedToNode, nil
+}
+
 func metricsProviderListToMap(providersList []api.MetricsProvider) map[api.MetricsSource]*api.MetricsProvider {
 	providersMap := make(map[api.MetricsSource]*api.MetricsProvider)
 	for _, provider := range providersList {
@@ -157,16 +270,19 @@ func metricsProviderListToMap(providersList []api.MetricsProvider) map[api.Metri
 func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, eventRecorder events.EventRecorder, sharedInformerFactory, namespacedSharedInformerFactory informers.SharedInformerFactory) (*descheduler, error) {
 	podInformer := sharedInformerFactory.Core().V1().Pods().Informer()

-	ir := newInformerResources(sharedInformerFactory)
-	ir.Uses(v1.SchemeGroupVersion.WithResource("pods"),
+	// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
+	// consistent with the real runs without having to keep the list here in sync.
+	kubeClientSandbox, err := newKubeClientSandbox(rs.Client, sharedInformerFactory,
+		v1.SchemeGroupVersion.WithResource("pods"),
 		v1.SchemeGroupVersion.WithResource("nodes"),
-		// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
-		// consistent with the real runs without having to keep the list here in sync.
-		v1.SchemeGroupVersion.WithResource("namespaces"),                 // Used by the defaultevictor plugin
-		schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"),  // Used by the defaultevictor plugin
-		policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin
-		v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"),     // Used by the defaultevictor plugin
-	) // Used by the defaultevictor plugin
+		v1.SchemeGroupVersion.WithResource("namespaces"),
+		schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"),
+		policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"),
+		v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create kube client sandbox: %v", err)
+	}

 	getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
 	if err != nil {
@@ -194,29 +310,24 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu
 	}

 	desch := &descheduler{
-		rs:                     rs,
-		ir:                     ir,
-		getPodsAssignedToNode:  getPodsAssignedToNode,
-		sharedInformerFactory:  sharedInformerFactory,
-		deschedulerPolicy:      deschedulerPolicy,
-		eventRecorder:          eventRecorder,
-		podEvictor:             podEvictor,
-		podEvictionReactionFnc: podEvictionReactionFnc,
-		prometheusClient:       rs.PrometheusClient,
-		queue:                  workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: "descheduler"}),
-		metricsProviders:       metricsProviderListToMap(deschedulerPolicy.MetricsProviders),
+		rs:                    rs,
+		kubeClientSandbox:     kubeClientSandbox,
+		getPodsAssignedToNode: getPodsAssignedToNode,
+		sharedInformerFactory: sharedInformerFactory,
+		deschedulerPolicy:     deschedulerPolicy,
+		eventRecorder:         eventRecorder,
+		podEvictor:            podEvictor,
+		prometheusClient:      rs.PrometheusClient,
+		queue:                 workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: "descheduler"}),
+		metricsProviders:      metricsProviderListToMap(deschedulerPolicy.MetricsProviders),
 	}

-	nodeSelector := labels.Everything()
-	if deschedulerPolicy.NodeSelector != nil {
-		sel, err := labels.Parse(*deschedulerPolicy.NodeSelector)
-		if err != nil {
-			return nil, err
-		}
-		nodeSelector = sel
+	nodeSelector, err := nodeSelectorFromPolicy(deschedulerPolicy)
+	if err != nil {
+		return nil, err
 	}

-	if err := nodeutil.AddNodeSelectorIndexer(sharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector); err != nil {
+	if err := addNodeSelectorIndexer(sharedInformerFactory, nodeSelector); err != nil {
 		return nil, err
 	}

@@ -367,46 +478,24 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {
 	if d.rs.DryRun {
 		klog.V(3).Infof("Building a cached client from the cluster for the dry run")
 		// Create a new cache so we start from scratch without any leftovers
-		fakeClient := fakeclientset.NewSimpleClientset()
-		// simulate a pod eviction by deleting a pod
-		fakeClient.PrependReactor("create", "pods", d.podEvictionReactionFnc(fakeClient))
-		fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
-
-		err := d.ir.CopyTo(fakeClient, fakeSharedInformerFactory)
+		err := d.kubeClientSandbox.buildSandbox()
 		if err != nil {
 			return err
 		}

-		// create a new instance of the shared informer factor from the cached client
-		// register the pod informer, otherwise it will not get running
-		d.getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods().Informer())
+		getPodsAssignedToNode, err := setupInformerIndexers(d.kubeClientSandbox.fakeSharedInformerFactory(), d.deschedulerPolicy)
 		if err != nil {
-			return fmt.Errorf("build get pods assigned to node function error: %v", err)
-		}
-
-		nodeSelector := labels.Everything()
-		if d.deschedulerPolicy.NodeSelector != nil {
-			sel, err := labels.Parse(*d.deschedulerPolicy.NodeSelector)
-			if err != nil {
-				return err
-			}
-			nodeSelector = sel
-		}
-		// TODO(ingvagabund): copy paste all relevant indexers from the real client to the fake one
-		// TODO(ingvagabund): register one indexer per each profile. Respect the precedence of no profile-level node selector is specified.
-		//                    Also, keep a cache of node label selectors to detect duplicates to avoid creating an extra informer.
-
-		if err := nodeutil.AddNodeSelectorIndexer(fakeSharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector); err != nil {
 			return err
 		}
+		d.getPodsAssignedToNode = getPodsAssignedToNode

 		fakeCtx, cncl := context.WithCancel(context.TODO())
 		defer cncl()
-		fakeSharedInformerFactory.Start(fakeCtx.Done())
-		fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
+		d.kubeClientSandbox.fakeSharedInformerFactory().Start(fakeCtx.Done())
+		d.kubeClientSandbox.fakeSharedInformerFactory().WaitForCacheSync(fakeCtx.Done())

-		client = fakeClient
-		d.sharedInformerFactory = fakeSharedInformerFactory
+		client = d.kubeClientSandbox.fakeClient()
+		d.sharedInformerFactory = d.kubeClientSandbox.fakeSharedInformerFactory()
 	} else {
 		client = d.rs.Client
 	}
@@ -417,6 +506,10 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {

 	d.runProfiles(ctx, client)

+	if d.rs.DryRun {
+		d.kubeClientSandbox.reset()
+	}
+
 	klog.V(1).InfoS("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())

 	return nil
@@ -591,7 +684,7 @@ func validateVersionCompatibility(discovery discovery.DiscoveryInterface, desche
 	return nil
 }

-func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
+func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset, evictedCache *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
 	return func(action core.Action) (bool, runtime.Object, error) {
 		if action.GetSubresource() == "eviction" {
 			createAct, matched := action.(core.CreateActionImpl)
@@ -602,6 +695,16 @@ func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action cor
 			if !matched {
 				return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
 			}
+			podObj, err := fakeClient.Tracker().Get(action.GetResource(), eviction.GetNamespace(), eviction.GetName())
+			if err == nil {
+				if pod, ok := podObj.(*v1.Pod); ok {
+					evictedCache.add(pod)
+				} else {
+					return false, nil, fmt.Errorf("unable to convert object to *v1.Pod for %v/%v", eviction.GetNamespace(), eviction.GetName())
+				}
+			} else if !apierrors.IsNotFound(err) {
+				return false, nil, fmt.Errorf("unable to get pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
+			}
 			if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
 				return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
 			}
--- a/pkg/descheduler/descheduler_test.go
+++ b/pkg/descheduler/descheduler_test.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"math/rand"
 	"net/http"
+	"strings"
 	"testing"
 	"time"

@@ -18,6 +19,7 @@ import (
 	apiversion "k8s.io/apimachinery/pkg/version"
 	fakediscovery "k8s.io/client-go/discovery/fake"
 	"k8s.io/client-go/informers"
+	clientset "k8s.io/client-go/kubernetes"
 	fakeclientset "k8s.io/client-go/kubernetes/fake"
 	core "k8s.io/client-go/testing"
 	"k8s.io/component-base/featuregate"
@@ -531,14 +533,10 @@ func TestPodEvictorReset(t *testing.T) {
 			client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, nil, nil))

 			var fakeEvictedPods []string
-			descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
-				return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, nil)
-			}
-
 			for i, cycle := range tc.cycles {
-				if err := descheduler.runDeschedulerLoop(ctx); err != nil {
-					t.Fatalf("Cycle %d: Unable to run a descheduling loop: %v", i+1, err)
-				}
+				evictedPodNames := runDeschedulerLoopAndGetEvictedPods(ctx, t, descheduler, tc.dryRun)
+				fakeEvictedPods = append(fakeEvictedPods, evictedPodNames...)
+
 				if descheduler.podEvictor.TotalEvicted() != cycle.expectedTotalEvicted || len(evictedPods) != cycle.expectedRealEvictions || len(fakeEvictedPods) != cycle.expectedFakeEvictions {
 					t.Fatalf("Cycle %d: Expected (%v,%v,%v) pods evicted, got (%v,%v,%v) instead", i+1, cycle.expectedTotalEvicted, cycle.expectedRealEvictions, cycle.expectedFakeEvictions, descheduler.podEvictor.TotalEvicted(), len(evictedPods), len(fakeEvictedPods))
 				}
@@ -547,6 +545,49 @@ func TestPodEvictorReset(t *testing.T) {
 	}
 }

+// runDeschedulerLoopAndGetEvictedPods is a temporary duplication from runDeschedulerLoop
+// that will be removed after kubeClientSandbox gets migrated to event handlers.
+func runDeschedulerLoopAndGetEvictedPods(ctx context.Context, t *testing.T, d *descheduler, dryRun bool) []string {
+	var clientSet clientset.Interface
+	if dryRun {
+		if err := d.kubeClientSandbox.buildSandbox(); err != nil {
+			t.Fatalf("Failed to build sandbox: %v", err)
+		}
+
+		getPodsAssignedToNode, err := setupInformerIndexers(d.kubeClientSandbox.fakeSharedInformerFactory(), d.deschedulerPolicy)
+		if err != nil {
+			t.Fatalf("Failed to setup indexers: %v", err)
+		}
+		d.getPodsAssignedToNode = getPodsAssignedToNode
+
+		fakeCtx, cncl := context.WithCancel(context.TODO())
+		defer cncl()
+		d.kubeClientSandbox.fakeSharedInformerFactory().Start(fakeCtx.Done())
+		d.kubeClientSandbox.fakeSharedInformerFactory().WaitForCacheSync(fakeCtx.Done())
+
+		clientSet = d.kubeClientSandbox.fakeClient()
+		d.sharedInformerFactory = d.kubeClientSandbox.fakeSharedInformerFactory()
+	} else {
+		clientSet = d.rs.Client
+	}
+
+	d.podEvictor.SetClient(clientSet)
+	d.podEvictor.ResetCounters()
+
+	d.runProfiles(ctx, clientSet)
+
+	var evictedPodNames []string
+	if dryRun {
+		evictedPodsFromCache := d.kubeClientSandbox.evictedPodsCache.list()
+		for _, pod := range evictedPodsFromCache {
+			evictedPodNames = append(evictedPodNames, pod.Name)
+		}
+		d.kubeClientSandbox.reset()
+	}
+
+	return evictedPodNames
+}
+
 func checkTotals(t *testing.T, ctx context.Context, descheduler *descheduler, totalEvictionRequests, totalEvicted uint) {
 	if total := descheduler.podEvictor.TotalEvictionRequests(); total != totalEvictionRequests {
 		t.Fatalf("Expected %v total eviction requests, got %v instead", totalEvictionRequests, total)
@@ -602,7 +643,7 @@ func TestEvictionRequestsCache(t *testing.T) {
 	defer cancel()

 	var fakeEvictedPods []string
-	descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
+	descheduler.kubeClientSandbox.podEvictionReactionFnc = func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
 		return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, podEvictionError)
 	}

@@ -743,7 +784,7 @@ func TestDeschedulingLimits(t *testing.T) {
 			defer cancel()

 			var fakeEvictedPods []string
-			descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
+			descheduler.kubeClientSandbox.podEvictionReactionFnc = func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
 				return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, podEvictionError)
 			}

@@ -955,15 +996,11 @@ func TestNodeLabelSelectorBasedEviction(t *testing.T) {
 			var evictedPods []string
 			if !tc.dryRun {
 				client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, nil, nil))
-			} else {
-				deschedulerInstance.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
-					return podEvictionReactionTestingFnc(&evictedPods, nil, nil)
-				}
 			}

-			// Run descheduler
-			if err := deschedulerInstance.runDeschedulerLoop(ctx); err != nil {
-				t.Fatalf("Unable to run descheduler loop: %v", err)
+			evictedPodNames := runDeschedulerLoopAndGetEvictedPods(ctx, t, deschedulerInstance, tc.dryRun)
+			if tc.dryRun {
+				evictedPods = evictedPodNames
 			}

 			// Collect which nodes had pods evicted from them
@@ -1082,3 +1119,375 @@ func TestLoadAwareDescheduling(t *testing.T) {
 	}
 	t.Logf("Total evictions: %v", totalEs)
 }
+
+func TestKubeClientSandboxReset(t *testing.T) {
+	ctx := context.Background()
+	node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
+	p1 := test.BuildTestPod("p1", 100, 0, node1.Name, test.SetRSOwnerRef)
+	p2 := test.BuildTestPod("p2", 100, 0, node1.Name, test.SetRSOwnerRef)
+
+	client := fakeclientset.NewSimpleClientset(node1, p1, p2)
+	sharedInformerFactory := informers.NewSharedInformerFactoryWithOptions(client, 0)
+
+	// Explicitly get the informers to ensure they're registered
+	_ = sharedInformerFactory.Core().V1().Pods().Informer()
+	_ = sharedInformerFactory.Core().V1().Nodes().Informer()
+
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+	sharedInformerFactory.Start(ctx.Done())
+	sharedInformerFactory.WaitForCacheSync(ctx.Done())
+
+	sandbox, err := newKubeClientSandbox(client, sharedInformerFactory,
+		v1.SchemeGroupVersion.WithResource("pods"),
+		v1.SchemeGroupVersion.WithResource("nodes"),
+	)
+	if err != nil {
+		t.Fatalf("Failed to create kubeClientSandbox: %v", err)
+	}
+
+	if err := sandbox.buildSandbox(); err != nil {
+		t.Fatalf("Failed to build sandbox: %v", err)
+	}
+
+	eviction1 := &policy.Eviction{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      p1.Name,
+			Namespace: p1.Namespace,
+		},
+	}
+	eviction2 := &policy.Eviction{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      p2.Name,
+			Namespace: p2.Namespace,
+		},
+	}
+
+	if err := sandbox.fakeClient().CoreV1().Pods(p1.Namespace).EvictV1(context.TODO(), eviction1); err != nil {
+		t.Fatalf("Error evicting p1: %v", err)
+	}
+	if err := sandbox.fakeClient().CoreV1().Pods(p2.Namespace).EvictV1(context.TODO(), eviction2); err != nil {
+		t.Fatalf("Error evicting p2: %v", err)
+	}
+
+	evictedPods := sandbox.evictedPodsCache.list()
+	if len(evictedPods) != 2 {
+		t.Fatalf("Expected 2 evicted pods in cache, but got %d", len(evictedPods))
+	}
+	t.Logf("Evicted pods in cache before reset: %d", len(evictedPods))
+
+	for _, evictedPod := range evictedPods {
+		if evictedPod.Namespace == "" || evictedPod.Name == "" || evictedPod.UID == "" {
+			t.Errorf("Evicted pod has empty fields: namespace=%s, name=%s, uid=%s", evictedPod.Namespace, evictedPod.Name, evictedPod.UID)
+		}
+		t.Logf("Evicted pod: %s/%s (UID: %s)", evictedPod.Namespace, evictedPod.Name, evictedPod.UID)
+	}
+
+	sandbox.reset()
+
+	evictedPodsAfterReset := sandbox.evictedPodsCache.list()
+	if len(evictedPodsAfterReset) != 0 {
+		t.Fatalf("Expected cache to be empty after reset, but found %d pods", len(evictedPodsAfterReset))
+	}
+	t.Logf("Successfully verified cache is empty after reset")
+}
+
+func TestEvictedPodsCache(t *testing.T) {
+	t.Run("add single pod", func(t *testing.T) {
+		const (
+			podName      = "pod1"
+			podNamespace = "default"
+			podUID       = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+		)
+		cache := newEvictedPodsCache()
+		pod := &v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      podName,
+				Namespace: podNamespace,
+				UID:       podUID,
+			},
+		}
+
+		cache.add(pod)
+
+		pods := cache.list()
+		if len(pods) != 1 {
+			t.Fatalf("Expected 1 pod in cache, got %d", len(pods))
+		}
+		if pods[0].Name != podName || pods[0].Namespace != podNamespace || pods[0].UID != podUID {
+			t.Errorf("Pod data mismatch: got name=%s, namespace=%s, uid=%s", pods[0].Name, pods[0].Namespace, pods[0].UID)
+		}
+	})
+
+	t.Run("add multiple pods", func(t *testing.T) {
+		cache := newEvictedPodsCache()
+		pods := []*v1.Pod{
+			{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "11111111-1111-1111-1111-111111111111"}},
+			{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "22222222-2222-2222-2222-222222222222"}},
+			{ObjectMeta: metav1.ObjectMeta{Name: "pod3", Namespace: "default", UID: "33333333-3333-3333-3333-333333333333"}},
+		}
+
+		for _, pod := range pods {
+			cache.add(pod)
+		}
+
+		cachedPods := cache.list()
+		if len(cachedPods) != 3 {
+			t.Fatalf("Expected 3 pods in cache, got %d", len(cachedPods))
+		}
+
+		podMap := make(map[string]*evictedPodInfo)
+		for _, cachedPod := range cachedPods {
+			podMap[cachedPod.UID] = cachedPod
+		}
+
+		for _, pod := range pods {
+			cached, ok := podMap[string(pod.UID)]
+			if !ok {
+				t.Errorf("Pod with UID %s not found in cache", pod.UID)
+				continue
+			}
+			if cached.Name != pod.Name || cached.Namespace != pod.Namespace {
+				t.Errorf("Pod data mismatch for UID %s: got name=%s, namespace=%s", pod.UID, cached.Name, cached.Namespace)
+			}
+		}
+	})
+
+	t.Run("add duplicate pod updates entry", func(t *testing.T) {
+		const (
+			duplicateUID   = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
+			updatedPodName = "pod1-new"
+			updatedPodNS   = "kube-system"
+		)
+		cache := newEvictedPodsCache()
+		pod1 := &v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "pod1",
+				Namespace: "default",
+				UID:       duplicateUID,
+			},
+		}
+		pod2 := &v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      updatedPodName,
+				Namespace: updatedPodNS,
+				UID:       duplicateUID,
+			},
+		}
+
+		cache.add(pod1)
+		cache.add(pod2)
+
+		pods := cache.list()
+		if len(pods) != 1 {
+			t.Fatalf("Expected 1 pod in cache (duplicates should overwrite), got %d", len(pods))
+		}
+		if pods[0].Name != updatedPodName || pods[0].Namespace != updatedPodNS {
+			t.Errorf("Expected pod2 data, got name=%s, namespace=%s", pods[0].Name, pods[0].Namespace)
+		}
+	})
+
+	t.Run("list returns empty array for empty cache", func(t *testing.T) {
+		cache := newEvictedPodsCache()
+		pods := cache.list()
+		if pods == nil {
+			t.Fatal("Expected non-nil slice from list()")
+		}
+		if len(pods) != 0 {
+			t.Fatalf("Expected empty list, got %d pods", len(pods))
+		}
+	})
+
+	t.Run("list returns copies not references", func(t *testing.T) {
+		const originalPodName = "pod1"
+		cache := newEvictedPodsCache()
+		pod := &v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      originalPodName,
+				Namespace: "default",
+				UID:       "12345678-1234-1234-1234-123456789abc",
+			},
+		}
+		cache.add(pod)
+
+		pods1 := cache.list()
+		pods2 := cache.list()
+
+		if len(pods1) != 1 || len(pods2) != 1 {
+			t.Fatalf("Expected 1 pod in both lists")
+		}
+
+		pods1[0].Name = "modified"
+
+		if pods2[0].Name == "modified" {
+			t.Error("Modifying list result should not affect other list results (should be copies)")
+		}
+
+		pods3 := cache.list()
+		if pods3[0].Name != originalPodName {
+			t.Error("Cache data was modified, list() should return copies")
+		}
+	})
+
+	t.Run("clear empties the cache", func(t *testing.T) {
+		cache := newEvictedPodsCache()
+		cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "aaaa0000-0000-0000-0000-000000000001"}})
+		cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "bbbb0000-0000-0000-0000-000000000002"}})
+
+		if len(cache.list()) != 2 {
+			t.Fatal("Expected 2 pods before clear")
+		}
+
+		cache.clear()
+
+		pods := cache.list()
+		if len(pods) != 0 {
+			t.Fatalf("Expected empty cache after clear, got %d pods", len(pods))
+		}
+	})
+
+	t.Run("clear on empty cache is safe", func(t *testing.T) {
+		cache := newEvictedPodsCache()
+		cache.clear()
+
+		pods := cache.list()
+		if len(pods) != 0 {
+			t.Fatalf("Expected empty cache, got %d pods", len(pods))
+		}
+	})
+
+	t.Run("add after clear works correctly", func(t *testing.T) {
+		cache := newEvictedPodsCache()
+		cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "00000001-0001-0001-0001-000000000001"}})
+		cache.clear()
+		cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "00000002-0002-0002-0002-000000000002"}})
+
+		pods := cache.list()
+		if len(pods) != 1 {
+			t.Fatalf("Expected 1 pod after clear and add, got %d", len(pods))
+		}
+		if pods[0].Name != "pod2" {
+			t.Errorf("Expected pod2, got %s", pods[0].Name)
+		}
+	})
+}
+
+func TestPodEvictionReactionFncErrorHandling(t *testing.T) {
+	podsGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
+
+	testCases := []struct {
+		name             string
+		setupFnc         func(*fakeclientset.Clientset) (name, namespace string)
+		expectHandled    bool
+		expectError      bool
+		errorContains    string
+		expectedCacheLen int
+	}{
+		{
+			name: "handles pod eviction successfully and adds to cache",
+			setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
+				pod := test.BuildTestPod("pod1", 100, 0, "node1", test.SetRSOwnerRef)
+				err := fakeClient.Tracker().Add(pod)
+				if err != nil {
+					t.Fatalf("Failed to add pod: %v", err)
+				}
+				return pod.Name, pod.Namespace
+			},
+			expectHandled:    true,
+			expectError:      false,
+			expectedCacheLen: 1,
+		},
+		{
+			name: "returns false and error when delete fails allowing other reactors to handle",
+			setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
+				pod := test.BuildTestPod("pod1", 100, 0, "node1", test.SetRSOwnerRef)
+				if err := fakeClient.Tracker().Add(pod); err != nil {
+					t.Fatalf("Failed to add pod: %v", err)
+				}
+				if err := fakeClient.Tracker().Delete(podsGVR, pod.Namespace, pod.Name); err != nil {
+					t.Fatalf("Failed to pre-delete pod: %v", err)
+				}
+				return pod.Name, pod.Namespace
+			},
+			expectHandled:    false,
+			expectError:      true,
+			errorContains:    "unable to delete pod",
+			expectedCacheLen: 0,
+		},
+		{
+			name: "returns error when pod doesn't exist in tracker from the start",
+			setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
+				// Don't add the pod to the tracker at all
+				return "nonexistent-pod", "default"
+			},
+			expectHandled:    false,
+			expectError:      true,
+			errorContains:    "unable to delete pod",
+			expectedCacheLen: 0,
+		},
+		{
+			name: "returns error when object is not a pod",
+			setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
+				configMap := &v1.ConfigMap{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "test-config",
+						Namespace: "default",
+					},
+				}
+				if err := fakeClient.Tracker().Create(podsGVR, configMap, "default"); err != nil {
+					t.Fatalf("Failed to add ConfigMap to pods resource: %v", err)
+				}
+				return configMap.Name, configMap.Namespace
+			},
+			expectHandled:    false,
+			expectError:      true,
+			errorContains:    "unable to convert object to *v1.Pod",
+			expectedCacheLen: 0,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			fakeClient := fakeclientset.NewSimpleClientset()
+			cache := newEvictedPodsCache()
+
+			name, namespace := tc.setupFnc(fakeClient)
+
+			reactionFnc := podEvictionReactionFnc(fakeClient, cache)
+
+			handled, _, err := reactionFnc(core.NewCreateSubresourceAction(
+				podsGVR,
+				name,
+				"eviction",
+				namespace,
+				&policy.Eviction{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      name,
+						Namespace: namespace,
+					},
+				},
+			))
+
+			if handled != tc.expectHandled {
+				t.Errorf("Expected handled=%v, got %v", tc.expectHandled, handled)
+			}
+
+			if tc.expectError {
+				if err == nil {
+					t.Fatal("Expected error, got nil")
+				}
+				if !strings.Contains(err.Error(), tc.errorContains) {
+					t.Errorf("Expected error message to contain '%s', got: %v", tc.errorContains, err)
+				}
+			} else {
+				if err != nil {
+					t.Errorf("Expected no error, got: %v", err)
+				}
+			}
+
+			if len(cache.list()) != tc.expectedCacheLen {
+				t.Errorf("Expected %d pods in cache, got %d", tc.expectedCacheLen, len(cache.list()))
+			}
+		})
+	}
+}
Author	SHA1	Message	Date
Kubernetes Prow Robot	cc96a3ee7a	Merge pull request #1811 from ingvagabund/kube-client-sandbox refactor(pkg/operator): replace informerResource with a kubeClientSandbox	2026-01-22 19:59:28 +05:30
Jan Chaloupka	ff580a0eff	refactor(kubeClientSandbox): keep a cache of evicted pods and allow to reset it at the end of each descheduling cycle	2026-01-22 14:49:47 +01:00
Jan Chaloupka	4af097a806	refactor(pkg/operator): create a helper for registering indexer in the dry run mode	2026-01-22 14:44:41 +01:00
Jan Chaloupka	b3f0184af8	refactor(kubeClientSandbox): helpers for creating a node selector and node selector indexer	2026-01-22 14:44:32 +01:00
Jan Chaloupka	881ead3ed2	refactor(kubeClientSandbox): set the create pods reactor in buildSandbox	2026-01-22 14:44:14 +01:00
Jan Chaloupka	fc6d0d1132	refactor(pkg/operator): replace informerResource with a kubeClientSandbox	2026-01-22 14:41:48 +01:00