1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-25 20:59:28 +01:00

Compare commits

...

6 Commits

2 changed files with 607 additions and 95 deletions

View File

@@ -22,6 +22,7 @@ import (
"math"
"net/http"
"strconv"
"sync"
"time"
promapi "github.com/prometheus/client_golang/api"
@@ -84,16 +85,61 @@ type profileRunner struct {
descheduleEPs, balanceEPs eprunner
}
// evictedPodInfo stores identifying information about a pod that was evicted during dry-run mode
type evictedPodInfo struct {
Namespace string
Name string
UID string
}
// evictedPodsCache is a thread-safe cache for tracking pods evicted during dry-run mode
type evictedPodsCache struct {
sync.RWMutex
pods map[string]*evictedPodInfo
}
func newEvictedPodsCache() *evictedPodsCache {
return &evictedPodsCache{
pods: make(map[string]*evictedPodInfo),
}
}
func (c *evictedPodsCache) add(pod *v1.Pod) {
c.Lock()
defer c.Unlock()
c.pods[string(pod.UID)] = &evictedPodInfo{
Namespace: pod.Namespace,
Name: pod.Name,
UID: string(pod.UID),
}
}
func (c *evictedPodsCache) list() []*evictedPodInfo {
c.RLock()
defer c.RUnlock()
pods := make([]*evictedPodInfo, 0, len(c.pods))
for _, pod := range c.pods {
podCopy := *pod
pods = append(pods, &podCopy)
}
return pods
}
func (c *evictedPodsCache) clear() {
c.Lock()
defer c.Unlock()
c.pods = make(map[string]*evictedPodInfo)
}
type descheduler struct {
rs *options.DeschedulerServer
ir *informerResources
kubeClientSandbox *kubeClientSandbox
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
sharedInformerFactory informers.SharedInformerFactory
namespacedSecretsLister corev1listers.SecretNamespaceLister
deschedulerPolicy *api.DeschedulerPolicy
eventRecorder events.EventRecorder
podEvictor *evictions.PodEvictor
podEvictionReactionFnc func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error)
metricsCollector *metricscollector.MetricsCollector
prometheusClient promapi.Client
previousPrometheusClientTransport *http.Transport
@@ -102,34 +148,46 @@ type descheduler struct {
metricsProviders map[api.MetricsSource]*api.MetricsProvider
}
type informerResources struct {
sharedInformerFactory informers.SharedInformerFactory
resourceToInformer map[schema.GroupVersionResource]informers.GenericInformer
// kubeClientSandbox creates a sandbox environment with a fake client and informer factory
// that mirrors resources from a real client, useful for dry-run testing scenarios
type kubeClientSandbox struct {
client clientset.Interface
sharedInformerFactory informers.SharedInformerFactory
fakeKubeClient *fakeclientset.Clientset
fakeFactory informers.SharedInformerFactory
resourceToInformer map[schema.GroupVersionResource]informers.GenericInformer
evictedPodsCache *evictedPodsCache
podEvictionReactionFnc func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error)
}
func newInformerResources(sharedInformerFactory informers.SharedInformerFactory) *informerResources {
return &informerResources{
sharedInformerFactory: sharedInformerFactory,
resourceToInformer: make(map[schema.GroupVersionResource]informers.GenericInformer),
func newKubeClientSandbox(client clientset.Interface, sharedInformerFactory informers.SharedInformerFactory, resources ...schema.GroupVersionResource) (*kubeClientSandbox, error) {
sandbox := &kubeClientSandbox{
client: client,
sharedInformerFactory: sharedInformerFactory,
resourceToInformer: make(map[schema.GroupVersionResource]informers.GenericInformer),
evictedPodsCache: newEvictedPodsCache(),
podEvictionReactionFnc: podEvictionReactionFnc,
}
}
func (ir *informerResources) Uses(resources ...schema.GroupVersionResource) error {
for _, resource := range resources {
informer, err := ir.sharedInformerFactory.ForResource(resource)
informer, err := sharedInformerFactory.ForResource(resource)
if err != nil {
return err
return nil, err
}
ir.resourceToInformer[resource] = informer
sandbox.resourceToInformer[resource] = informer
}
return nil
return sandbox, nil
}
// CopyTo Copy informer subscriptions to the new factory and objects to the fake client so that the backing caches are populated for when listers are used.
func (ir *informerResources) CopyTo(fakeClient *fakeclientset.Clientset, newFactory informers.SharedInformerFactory) error {
for resource, informer := range ir.resourceToInformer {
_, err := newFactory.ForResource(resource)
func (sandbox *kubeClientSandbox) buildSandbox() error {
sandbox.fakeKubeClient = fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
sandbox.fakeKubeClient.PrependReactor("create", "pods", sandbox.podEvictionReactionFnc(sandbox.fakeKubeClient, sandbox.evictedPodsCache))
sandbox.fakeFactory = informers.NewSharedInformerFactory(sandbox.fakeKubeClient, 0)
for resource, informer := range sandbox.resourceToInformer {
_, err := sandbox.fakeFactory.ForResource(resource)
if err != nil {
return fmt.Errorf("error getting resource %s: %w", resource, err)
}
@@ -140,12 +198,67 @@ func (ir *informerResources) CopyTo(fakeClient *fakeclientset.Clientset, newFact
}
for _, object := range objects {
fakeClient.Tracker().Add(object)
if err := sandbox.fakeKubeClient.Tracker().Add(object); err != nil {
return fmt.Errorf("error adding object to tracker: %w", err)
}
}
}
return nil
}
func (sandbox *kubeClientSandbox) fakeClient() *fakeclientset.Clientset {
return sandbox.fakeKubeClient
}
func (sandbox *kubeClientSandbox) fakeSharedInformerFactory() informers.SharedInformerFactory {
return sandbox.fakeFactory
}
func (sandbox *kubeClientSandbox) reset() {
sandbox.evictedPodsCache.clear()
}
func nodeSelectorFromPolicy(deschedulerPolicy *api.DeschedulerPolicy) (labels.Selector, error) {
nodeSelector := labels.Everything()
if deschedulerPolicy.NodeSelector != nil {
sel, err := labels.Parse(*deschedulerPolicy.NodeSelector)
if err != nil {
return nil, err
}
nodeSelector = sel
}
return nodeSelector, nil
}
func addNodeSelectorIndexer(sharedInformerFactory informers.SharedInformerFactory, nodeSelector labels.Selector) error {
return nodeutil.AddNodeSelectorIndexer(sharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector)
}
func setupInformerIndexers(sharedInformerFactory informers.SharedInformerFactory, deschedulerPolicy *api.DeschedulerPolicy) (podutil.GetPodsAssignedToNodeFunc, error) {
// create a new instance of the shared informer factory from the cached client
// register the pod informer, otherwise it will not get running
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(sharedInformerFactory.Core().V1().Pods().Informer())
if err != nil {
return nil, fmt.Errorf("build get pods assigned to node function error: %v", err)
}
// TODO(ingvagabund): copy paste all relevant indexers from the real client to the fake one
// TODO(ingvagabund): register one indexer per each profile. Respect the precedence of no profile-level node selector is specified.
// Also, keep a cache of node label selectors to detect duplicates to avoid creating an extra informer.
nodeSelector, err := nodeSelectorFromPolicy(deschedulerPolicy)
if err != nil {
return nil, err
}
if err := addNodeSelectorIndexer(sharedInformerFactory, nodeSelector); err != nil {
return nil, err
}
return getPodsAssignedToNode, nil
}
func metricsProviderListToMap(providersList []api.MetricsProvider) map[api.MetricsSource]*api.MetricsProvider {
providersMap := make(map[api.MetricsSource]*api.MetricsProvider)
for _, provider := range providersList {
@@ -157,16 +270,19 @@ func metricsProviderListToMap(providersList []api.MetricsProvider) map[api.Metri
func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, eventRecorder events.EventRecorder, sharedInformerFactory, namespacedSharedInformerFactory informers.SharedInformerFactory) (*descheduler, error) {
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
ir := newInformerResources(sharedInformerFactory)
ir.Uses(v1.SchemeGroupVersion.WithResource("pods"),
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
// consistent with the real runs without having to keep the list here in sync.
kubeClientSandbox, err := newKubeClientSandbox(rs.Client, sharedInformerFactory,
v1.SchemeGroupVersion.WithResource("pods"),
v1.SchemeGroupVersion.WithResource("nodes"),
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
// consistent with the real runs without having to keep the list here in sync.
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), // Used by the defaultevictor plugin
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin
v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"), // Used by the defaultevictor plugin
) // Used by the defaultevictor plugin
v1.SchemeGroupVersion.WithResource("namespaces"),
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"),
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"),
v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"),
)
if err != nil {
return nil, fmt.Errorf("failed to create kube client sandbox: %v", err)
}
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
@@ -194,29 +310,24 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu
}
desch := &descheduler{
rs: rs,
ir: ir,
getPodsAssignedToNode: getPodsAssignedToNode,
sharedInformerFactory: sharedInformerFactory,
deschedulerPolicy: deschedulerPolicy,
eventRecorder: eventRecorder,
podEvictor: podEvictor,
podEvictionReactionFnc: podEvictionReactionFnc,
prometheusClient: rs.PrometheusClient,
queue: workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: "descheduler"}),
metricsProviders: metricsProviderListToMap(deschedulerPolicy.MetricsProviders),
rs: rs,
kubeClientSandbox: kubeClientSandbox,
getPodsAssignedToNode: getPodsAssignedToNode,
sharedInformerFactory: sharedInformerFactory,
deschedulerPolicy: deschedulerPolicy,
eventRecorder: eventRecorder,
podEvictor: podEvictor,
prometheusClient: rs.PrometheusClient,
queue: workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: "descheduler"}),
metricsProviders: metricsProviderListToMap(deschedulerPolicy.MetricsProviders),
}
nodeSelector := labels.Everything()
if deschedulerPolicy.NodeSelector != nil {
sel, err := labels.Parse(*deschedulerPolicy.NodeSelector)
if err != nil {
return nil, err
}
nodeSelector = sel
nodeSelector, err := nodeSelectorFromPolicy(deschedulerPolicy)
if err != nil {
return nil, err
}
if err := nodeutil.AddNodeSelectorIndexer(sharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector); err != nil {
if err := addNodeSelectorIndexer(sharedInformerFactory, nodeSelector); err != nil {
return nil, err
}
@@ -367,46 +478,24 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {
if d.rs.DryRun {
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
// Create a new cache so we start from scratch without any leftovers
fakeClient := fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
fakeClient.PrependReactor("create", "pods", d.podEvictionReactionFnc(fakeClient))
fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
err := d.ir.CopyTo(fakeClient, fakeSharedInformerFactory)
err := d.kubeClientSandbox.buildSandbox()
if err != nil {
return err
}
// create a new instance of the shared informer factor from the cached client
// register the pod informer, otherwise it will not get running
d.getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods().Informer())
getPodsAssignedToNode, err := setupInformerIndexers(d.kubeClientSandbox.fakeSharedInformerFactory(), d.deschedulerPolicy)
if err != nil {
return fmt.Errorf("build get pods assigned to node function error: %v", err)
}
nodeSelector := labels.Everything()
if d.deschedulerPolicy.NodeSelector != nil {
sel, err := labels.Parse(*d.deschedulerPolicy.NodeSelector)
if err != nil {
return err
}
nodeSelector = sel
}
// TODO(ingvagabund): copy paste all relevant indexers from the real client to the fake one
// TODO(ingvagabund): register one indexer per each profile. Respect the precedence of no profile-level node selector is specified.
// Also, keep a cache of node label selectors to detect duplicates to avoid creating an extra informer.
if err := nodeutil.AddNodeSelectorIndexer(fakeSharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector); err != nil {
return err
}
d.getPodsAssignedToNode = getPodsAssignedToNode
fakeCtx, cncl := context.WithCancel(context.TODO())
defer cncl()
fakeSharedInformerFactory.Start(fakeCtx.Done())
fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
d.kubeClientSandbox.fakeSharedInformerFactory().Start(fakeCtx.Done())
d.kubeClientSandbox.fakeSharedInformerFactory().WaitForCacheSync(fakeCtx.Done())
client = fakeClient
d.sharedInformerFactory = fakeSharedInformerFactory
client = d.kubeClientSandbox.fakeClient()
d.sharedInformerFactory = d.kubeClientSandbox.fakeSharedInformerFactory()
} else {
client = d.rs.Client
}
@@ -417,6 +506,10 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {
d.runProfiles(ctx, client)
if d.rs.DryRun {
d.kubeClientSandbox.reset()
}
klog.V(1).InfoS("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())
return nil
@@ -591,7 +684,7 @@ func validateVersionCompatibility(discovery discovery.DiscoveryInterface, desche
return nil
}
func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset, evictedCache *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
return func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() == "eviction" {
createAct, matched := action.(core.CreateActionImpl)
@@ -602,6 +695,16 @@ func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action cor
if !matched {
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
}
podObj, err := fakeClient.Tracker().Get(action.GetResource(), eviction.GetNamespace(), eviction.GetName())
if err == nil {
if pod, ok := podObj.(*v1.Pod); ok {
evictedCache.add(pod)
} else {
return false, nil, fmt.Errorf("unable to convert object to *v1.Pod for %v/%v", eviction.GetNamespace(), eviction.GetName())
}
} else if !apierrors.IsNotFound(err) {
return false, nil, fmt.Errorf("unable to get pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
}
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"math/rand"
"net/http"
"strings"
"testing"
"time"
@@ -18,6 +19,7 @@ import (
apiversion "k8s.io/apimachinery/pkg/version"
fakediscovery "k8s.io/client-go/discovery/fake"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/component-base/featuregate"
@@ -531,14 +533,10 @@ func TestPodEvictorReset(t *testing.T) {
client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, nil, nil))
var fakeEvictedPods []string
descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, nil)
}
for i, cycle := range tc.cycles {
if err := descheduler.runDeschedulerLoop(ctx); err != nil {
t.Fatalf("Cycle %d: Unable to run a descheduling loop: %v", i+1, err)
}
evictedPodNames := runDeschedulerLoopAndGetEvictedPods(ctx, t, descheduler, tc.dryRun)
fakeEvictedPods = append(fakeEvictedPods, evictedPodNames...)
if descheduler.podEvictor.TotalEvicted() != cycle.expectedTotalEvicted || len(evictedPods) != cycle.expectedRealEvictions || len(fakeEvictedPods) != cycle.expectedFakeEvictions {
t.Fatalf("Cycle %d: Expected (%v,%v,%v) pods evicted, got (%v,%v,%v) instead", i+1, cycle.expectedTotalEvicted, cycle.expectedRealEvictions, cycle.expectedFakeEvictions, descheduler.podEvictor.TotalEvicted(), len(evictedPods), len(fakeEvictedPods))
}
@@ -547,6 +545,49 @@ func TestPodEvictorReset(t *testing.T) {
}
}
// runDeschedulerLoopAndGetEvictedPods is a temporary duplication from runDeschedulerLoop
// that will be removed after kubeClientSandbox gets migrated to event handlers.
func runDeschedulerLoopAndGetEvictedPods(ctx context.Context, t *testing.T, d *descheduler, dryRun bool) []string {
var clientSet clientset.Interface
if dryRun {
if err := d.kubeClientSandbox.buildSandbox(); err != nil {
t.Fatalf("Failed to build sandbox: %v", err)
}
getPodsAssignedToNode, err := setupInformerIndexers(d.kubeClientSandbox.fakeSharedInformerFactory(), d.deschedulerPolicy)
if err != nil {
t.Fatalf("Failed to setup indexers: %v", err)
}
d.getPodsAssignedToNode = getPodsAssignedToNode
fakeCtx, cncl := context.WithCancel(context.TODO())
defer cncl()
d.kubeClientSandbox.fakeSharedInformerFactory().Start(fakeCtx.Done())
d.kubeClientSandbox.fakeSharedInformerFactory().WaitForCacheSync(fakeCtx.Done())
clientSet = d.kubeClientSandbox.fakeClient()
d.sharedInformerFactory = d.kubeClientSandbox.fakeSharedInformerFactory()
} else {
clientSet = d.rs.Client
}
d.podEvictor.SetClient(clientSet)
d.podEvictor.ResetCounters()
d.runProfiles(ctx, clientSet)
var evictedPodNames []string
if dryRun {
evictedPodsFromCache := d.kubeClientSandbox.evictedPodsCache.list()
for _, pod := range evictedPodsFromCache {
evictedPodNames = append(evictedPodNames, pod.Name)
}
d.kubeClientSandbox.reset()
}
return evictedPodNames
}
func checkTotals(t *testing.T, ctx context.Context, descheduler *descheduler, totalEvictionRequests, totalEvicted uint) {
if total := descheduler.podEvictor.TotalEvictionRequests(); total != totalEvictionRequests {
t.Fatalf("Expected %v total eviction requests, got %v instead", totalEvictionRequests, total)
@@ -602,7 +643,7 @@ func TestEvictionRequestsCache(t *testing.T) {
defer cancel()
var fakeEvictedPods []string
descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
descheduler.kubeClientSandbox.podEvictionReactionFnc = func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, podEvictionError)
}
@@ -743,7 +784,7 @@ func TestDeschedulingLimits(t *testing.T) {
defer cancel()
var fakeEvictedPods []string
descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
descheduler.kubeClientSandbox.podEvictionReactionFnc = func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, podEvictionError)
}
@@ -955,15 +996,11 @@ func TestNodeLabelSelectorBasedEviction(t *testing.T) {
var evictedPods []string
if !tc.dryRun {
client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, nil, nil))
} else {
deschedulerInstance.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
return podEvictionReactionTestingFnc(&evictedPods, nil, nil)
}
}
// Run descheduler
if err := deschedulerInstance.runDeschedulerLoop(ctx); err != nil {
t.Fatalf("Unable to run descheduler loop: %v", err)
evictedPodNames := runDeschedulerLoopAndGetEvictedPods(ctx, t, deschedulerInstance, tc.dryRun)
if tc.dryRun {
evictedPods = evictedPodNames
}
// Collect which nodes had pods evicted from them
@@ -1082,3 +1119,375 @@ func TestLoadAwareDescheduling(t *testing.T) {
}
t.Logf("Total evictions: %v", totalEs)
}
func TestKubeClientSandboxReset(t *testing.T) {
ctx := context.Background()
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, test.SetRSOwnerRef)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, test.SetRSOwnerRef)
client := fakeclientset.NewSimpleClientset(node1, p1, p2)
sharedInformerFactory := informers.NewSharedInformerFactoryWithOptions(client, 0)
// Explicitly get the informers to ensure they're registered
_ = sharedInformerFactory.Core().V1().Pods().Informer()
_ = sharedInformerFactory.Core().V1().Nodes().Informer()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
sandbox, err := newKubeClientSandbox(client, sharedInformerFactory,
v1.SchemeGroupVersion.WithResource("pods"),
v1.SchemeGroupVersion.WithResource("nodes"),
)
if err != nil {
t.Fatalf("Failed to create kubeClientSandbox: %v", err)
}
if err := sandbox.buildSandbox(); err != nil {
t.Fatalf("Failed to build sandbox: %v", err)
}
eviction1 := &policy.Eviction{
ObjectMeta: metav1.ObjectMeta{
Name: p1.Name,
Namespace: p1.Namespace,
},
}
eviction2 := &policy.Eviction{
ObjectMeta: metav1.ObjectMeta{
Name: p2.Name,
Namespace: p2.Namespace,
},
}
if err := sandbox.fakeClient().CoreV1().Pods(p1.Namespace).EvictV1(context.TODO(), eviction1); err != nil {
t.Fatalf("Error evicting p1: %v", err)
}
if err := sandbox.fakeClient().CoreV1().Pods(p2.Namespace).EvictV1(context.TODO(), eviction2); err != nil {
t.Fatalf("Error evicting p2: %v", err)
}
evictedPods := sandbox.evictedPodsCache.list()
if len(evictedPods) != 2 {
t.Fatalf("Expected 2 evicted pods in cache, but got %d", len(evictedPods))
}
t.Logf("Evicted pods in cache before reset: %d", len(evictedPods))
for _, evictedPod := range evictedPods {
if evictedPod.Namespace == "" || evictedPod.Name == "" || evictedPod.UID == "" {
t.Errorf("Evicted pod has empty fields: namespace=%s, name=%s, uid=%s", evictedPod.Namespace, evictedPod.Name, evictedPod.UID)
}
t.Logf("Evicted pod: %s/%s (UID: %s)", evictedPod.Namespace, evictedPod.Name, evictedPod.UID)
}
sandbox.reset()
evictedPodsAfterReset := sandbox.evictedPodsCache.list()
if len(evictedPodsAfterReset) != 0 {
t.Fatalf("Expected cache to be empty after reset, but found %d pods", len(evictedPodsAfterReset))
}
t.Logf("Successfully verified cache is empty after reset")
}
func TestEvictedPodsCache(t *testing.T) {
t.Run("add single pod", func(t *testing.T) {
const (
podName = "pod1"
podNamespace = "default"
podUID = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
)
cache := newEvictedPodsCache()
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: podNamespace,
UID: podUID,
},
}
cache.add(pod)
pods := cache.list()
if len(pods) != 1 {
t.Fatalf("Expected 1 pod in cache, got %d", len(pods))
}
if pods[0].Name != podName || pods[0].Namespace != podNamespace || pods[0].UID != podUID {
t.Errorf("Pod data mismatch: got name=%s, namespace=%s, uid=%s", pods[0].Name, pods[0].Namespace, pods[0].UID)
}
})
t.Run("add multiple pods", func(t *testing.T) {
cache := newEvictedPodsCache()
pods := []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "11111111-1111-1111-1111-111111111111"}},
{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "22222222-2222-2222-2222-222222222222"}},
{ObjectMeta: metav1.ObjectMeta{Name: "pod3", Namespace: "default", UID: "33333333-3333-3333-3333-333333333333"}},
}
for _, pod := range pods {
cache.add(pod)
}
cachedPods := cache.list()
if len(cachedPods) != 3 {
t.Fatalf("Expected 3 pods in cache, got %d", len(cachedPods))
}
podMap := make(map[string]*evictedPodInfo)
for _, cachedPod := range cachedPods {
podMap[cachedPod.UID] = cachedPod
}
for _, pod := range pods {
cached, ok := podMap[string(pod.UID)]
if !ok {
t.Errorf("Pod with UID %s not found in cache", pod.UID)
continue
}
if cached.Name != pod.Name || cached.Namespace != pod.Namespace {
t.Errorf("Pod data mismatch for UID %s: got name=%s, namespace=%s", pod.UID, cached.Name, cached.Namespace)
}
}
})
t.Run("add duplicate pod updates entry", func(t *testing.T) {
const (
duplicateUID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
updatedPodName = "pod1-new"
updatedPodNS = "kube-system"
)
cache := newEvictedPodsCache()
pod1 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
Namespace: "default",
UID: duplicateUID,
},
}
pod2 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: updatedPodName,
Namespace: updatedPodNS,
UID: duplicateUID,
},
}
cache.add(pod1)
cache.add(pod2)
pods := cache.list()
if len(pods) != 1 {
t.Fatalf("Expected 1 pod in cache (duplicates should overwrite), got %d", len(pods))
}
if pods[0].Name != updatedPodName || pods[0].Namespace != updatedPodNS {
t.Errorf("Expected pod2 data, got name=%s, namespace=%s", pods[0].Name, pods[0].Namespace)
}
})
t.Run("list returns empty array for empty cache", func(t *testing.T) {
cache := newEvictedPodsCache()
pods := cache.list()
if pods == nil {
t.Fatal("Expected non-nil slice from list()")
}
if len(pods) != 0 {
t.Fatalf("Expected empty list, got %d pods", len(pods))
}
})
t.Run("list returns copies not references", func(t *testing.T) {
const originalPodName = "pod1"
cache := newEvictedPodsCache()
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: originalPodName,
Namespace: "default",
UID: "12345678-1234-1234-1234-123456789abc",
},
}
cache.add(pod)
pods1 := cache.list()
pods2 := cache.list()
if len(pods1) != 1 || len(pods2) != 1 {
t.Fatalf("Expected 1 pod in both lists")
}
pods1[0].Name = "modified"
if pods2[0].Name == "modified" {
t.Error("Modifying list result should not affect other list results (should be copies)")
}
pods3 := cache.list()
if pods3[0].Name != originalPodName {
t.Error("Cache data was modified, list() should return copies")
}
})
t.Run("clear empties the cache", func(t *testing.T) {
cache := newEvictedPodsCache()
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "aaaa0000-0000-0000-0000-000000000001"}})
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "bbbb0000-0000-0000-0000-000000000002"}})
if len(cache.list()) != 2 {
t.Fatal("Expected 2 pods before clear")
}
cache.clear()
pods := cache.list()
if len(pods) != 0 {
t.Fatalf("Expected empty cache after clear, got %d pods", len(pods))
}
})
t.Run("clear on empty cache is safe", func(t *testing.T) {
cache := newEvictedPodsCache()
cache.clear()
pods := cache.list()
if len(pods) != 0 {
t.Fatalf("Expected empty cache, got %d pods", len(pods))
}
})
t.Run("add after clear works correctly", func(t *testing.T) {
cache := newEvictedPodsCache()
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "00000001-0001-0001-0001-000000000001"}})
cache.clear()
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "00000002-0002-0002-0002-000000000002"}})
pods := cache.list()
if len(pods) != 1 {
t.Fatalf("Expected 1 pod after clear and add, got %d", len(pods))
}
if pods[0].Name != "pod2" {
t.Errorf("Expected pod2, got %s", pods[0].Name)
}
})
}
func TestPodEvictionReactionFncErrorHandling(t *testing.T) {
podsGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
testCases := []struct {
name string
setupFnc func(*fakeclientset.Clientset) (name, namespace string)
expectHandled bool
expectError bool
errorContains string
expectedCacheLen int
}{
{
name: "handles pod eviction successfully and adds to cache",
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
pod := test.BuildTestPod("pod1", 100, 0, "node1", test.SetRSOwnerRef)
err := fakeClient.Tracker().Add(pod)
if err != nil {
t.Fatalf("Failed to add pod: %v", err)
}
return pod.Name, pod.Namespace
},
expectHandled: true,
expectError: false,
expectedCacheLen: 1,
},
{
name: "returns false and error when delete fails allowing other reactors to handle",
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
pod := test.BuildTestPod("pod1", 100, 0, "node1", test.SetRSOwnerRef)
if err := fakeClient.Tracker().Add(pod); err != nil {
t.Fatalf("Failed to add pod: %v", err)
}
if err := fakeClient.Tracker().Delete(podsGVR, pod.Namespace, pod.Name); err != nil {
t.Fatalf("Failed to pre-delete pod: %v", err)
}
return pod.Name, pod.Namespace
},
expectHandled: false,
expectError: true,
errorContains: "unable to delete pod",
expectedCacheLen: 0,
},
{
name: "returns error when pod doesn't exist in tracker from the start",
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
// Don't add the pod to the tracker at all
return "nonexistent-pod", "default"
},
expectHandled: false,
expectError: true,
errorContains: "unable to delete pod",
expectedCacheLen: 0,
},
{
name: "returns error when object is not a pod",
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
configMap := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "test-config",
Namespace: "default",
},
}
if err := fakeClient.Tracker().Create(podsGVR, configMap, "default"); err != nil {
t.Fatalf("Failed to add ConfigMap to pods resource: %v", err)
}
return configMap.Name, configMap.Namespace
},
expectHandled: false,
expectError: true,
errorContains: "unable to convert object to *v1.Pod",
expectedCacheLen: 0,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
fakeClient := fakeclientset.NewSimpleClientset()
cache := newEvictedPodsCache()
name, namespace := tc.setupFnc(fakeClient)
reactionFnc := podEvictionReactionFnc(fakeClient, cache)
handled, _, err := reactionFnc(core.NewCreateSubresourceAction(
podsGVR,
name,
"eviction",
namespace,
&policy.Eviction{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
},
))
if handled != tc.expectHandled {
t.Errorf("Expected handled=%v, got %v", tc.expectHandled, handled)
}
if tc.expectError {
if err == nil {
t.Fatal("Expected error, got nil")
}
if !strings.Contains(err.Error(), tc.errorContains) {
t.Errorf("Expected error message to contain '%s', got: %v", tc.errorContains, err)
}
} else {
if err != nil {
t.Errorf("Expected no error, got: %v", err)
}
}
if len(cache.list()) != tc.expectedCacheLen {
t.Errorf("Expected %d pods in cache, got %d", tc.expectedCacheLen, len(cache.list()))
}
})
}
}