mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-25 20:59:28 +01:00
Merge pull request #1811 from ingvagabund/kube-client-sandbox
refactor(pkg/operator): replace informerResource with a kubeClientSandbox
This commit is contained in:
@@ -22,6 +22,7 @@ import (
|
||||
"math"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
promapi "github.com/prometheus/client_golang/api"
|
||||
@@ -84,16 +85,61 @@ type profileRunner struct {
|
||||
descheduleEPs, balanceEPs eprunner
|
||||
}
|
||||
|
||||
// evictedPodInfo stores identifying information about a pod that was evicted during dry-run mode
|
||||
type evictedPodInfo struct {
|
||||
Namespace string
|
||||
Name string
|
||||
UID string
|
||||
}
|
||||
|
||||
// evictedPodsCache is a thread-safe cache for tracking pods evicted during dry-run mode
|
||||
type evictedPodsCache struct {
|
||||
sync.RWMutex
|
||||
pods map[string]*evictedPodInfo
|
||||
}
|
||||
|
||||
func newEvictedPodsCache() *evictedPodsCache {
|
||||
return &evictedPodsCache{
|
||||
pods: make(map[string]*evictedPodInfo),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *evictedPodsCache) add(pod *v1.Pod) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
c.pods[string(pod.UID)] = &evictedPodInfo{
|
||||
Namespace: pod.Namespace,
|
||||
Name: pod.Name,
|
||||
UID: string(pod.UID),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *evictedPodsCache) list() []*evictedPodInfo {
|
||||
c.RLock()
|
||||
defer c.RUnlock()
|
||||
pods := make([]*evictedPodInfo, 0, len(c.pods))
|
||||
for _, pod := range c.pods {
|
||||
podCopy := *pod
|
||||
pods = append(pods, &podCopy)
|
||||
}
|
||||
return pods
|
||||
}
|
||||
|
||||
func (c *evictedPodsCache) clear() {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
c.pods = make(map[string]*evictedPodInfo)
|
||||
}
|
||||
|
||||
type descheduler struct {
|
||||
rs *options.DeschedulerServer
|
||||
ir *informerResources
|
||||
kubeClientSandbox *kubeClientSandbox
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
|
||||
sharedInformerFactory informers.SharedInformerFactory
|
||||
namespacedSecretsLister corev1listers.SecretNamespaceLister
|
||||
deschedulerPolicy *api.DeschedulerPolicy
|
||||
eventRecorder events.EventRecorder
|
||||
podEvictor *evictions.PodEvictor
|
||||
podEvictionReactionFnc func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error)
|
||||
metricsCollector *metricscollector.MetricsCollector
|
||||
prometheusClient promapi.Client
|
||||
previousPrometheusClientTransport *http.Transport
|
||||
@@ -102,34 +148,46 @@ type descheduler struct {
|
||||
metricsProviders map[api.MetricsSource]*api.MetricsProvider
|
||||
}
|
||||
|
||||
type informerResources struct {
|
||||
sharedInformerFactory informers.SharedInformerFactory
|
||||
resourceToInformer map[schema.GroupVersionResource]informers.GenericInformer
|
||||
// kubeClientSandbox creates a sandbox environment with a fake client and informer factory
|
||||
// that mirrors resources from a real client, useful for dry-run testing scenarios
|
||||
type kubeClientSandbox struct {
|
||||
client clientset.Interface
|
||||
sharedInformerFactory informers.SharedInformerFactory
|
||||
fakeKubeClient *fakeclientset.Clientset
|
||||
fakeFactory informers.SharedInformerFactory
|
||||
resourceToInformer map[schema.GroupVersionResource]informers.GenericInformer
|
||||
evictedPodsCache *evictedPodsCache
|
||||
podEvictionReactionFnc func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error)
|
||||
}
|
||||
|
||||
func newInformerResources(sharedInformerFactory informers.SharedInformerFactory) *informerResources {
|
||||
return &informerResources{
|
||||
sharedInformerFactory: sharedInformerFactory,
|
||||
resourceToInformer: make(map[schema.GroupVersionResource]informers.GenericInformer),
|
||||
func newKubeClientSandbox(client clientset.Interface, sharedInformerFactory informers.SharedInformerFactory, resources ...schema.GroupVersionResource) (*kubeClientSandbox, error) {
|
||||
sandbox := &kubeClientSandbox{
|
||||
client: client,
|
||||
sharedInformerFactory: sharedInformerFactory,
|
||||
resourceToInformer: make(map[schema.GroupVersionResource]informers.GenericInformer),
|
||||
evictedPodsCache: newEvictedPodsCache(),
|
||||
podEvictionReactionFnc: podEvictionReactionFnc,
|
||||
}
|
||||
}
|
||||
|
||||
func (ir *informerResources) Uses(resources ...schema.GroupVersionResource) error {
|
||||
for _, resource := range resources {
|
||||
informer, err := ir.sharedInformerFactory.ForResource(resource)
|
||||
informer, err := sharedInformerFactory.ForResource(resource)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ir.resourceToInformer[resource] = informer
|
||||
sandbox.resourceToInformer[resource] = informer
|
||||
}
|
||||
return nil
|
||||
|
||||
return sandbox, nil
|
||||
}
|
||||
|
||||
// CopyTo Copy informer subscriptions to the new factory and objects to the fake client so that the backing caches are populated for when listers are used.
|
||||
func (ir *informerResources) CopyTo(fakeClient *fakeclientset.Clientset, newFactory informers.SharedInformerFactory) error {
|
||||
for resource, informer := range ir.resourceToInformer {
|
||||
_, err := newFactory.ForResource(resource)
|
||||
func (sandbox *kubeClientSandbox) buildSandbox() error {
|
||||
sandbox.fakeKubeClient = fakeclientset.NewSimpleClientset()
|
||||
// simulate a pod eviction by deleting a pod
|
||||
sandbox.fakeKubeClient.PrependReactor("create", "pods", sandbox.podEvictionReactionFnc(sandbox.fakeKubeClient, sandbox.evictedPodsCache))
|
||||
sandbox.fakeFactory = informers.NewSharedInformerFactory(sandbox.fakeKubeClient, 0)
|
||||
|
||||
for resource, informer := range sandbox.resourceToInformer {
|
||||
_, err := sandbox.fakeFactory.ForResource(resource)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting resource %s: %w", resource, err)
|
||||
}
|
||||
@@ -140,12 +198,67 @@ func (ir *informerResources) CopyTo(fakeClient *fakeclientset.Clientset, newFact
|
||||
}
|
||||
|
||||
for _, object := range objects {
|
||||
fakeClient.Tracker().Add(object)
|
||||
if err := sandbox.fakeKubeClient.Tracker().Add(object); err != nil {
|
||||
return fmt.Errorf("error adding object to tracker: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sandbox *kubeClientSandbox) fakeClient() *fakeclientset.Clientset {
|
||||
return sandbox.fakeKubeClient
|
||||
}
|
||||
|
||||
func (sandbox *kubeClientSandbox) fakeSharedInformerFactory() informers.SharedInformerFactory {
|
||||
return sandbox.fakeFactory
|
||||
}
|
||||
|
||||
func (sandbox *kubeClientSandbox) reset() {
|
||||
sandbox.evictedPodsCache.clear()
|
||||
}
|
||||
|
||||
func nodeSelectorFromPolicy(deschedulerPolicy *api.DeschedulerPolicy) (labels.Selector, error) {
|
||||
nodeSelector := labels.Everything()
|
||||
if deschedulerPolicy.NodeSelector != nil {
|
||||
sel, err := labels.Parse(*deschedulerPolicy.NodeSelector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodeSelector = sel
|
||||
}
|
||||
return nodeSelector, nil
|
||||
}
|
||||
|
||||
func addNodeSelectorIndexer(sharedInformerFactory informers.SharedInformerFactory, nodeSelector labels.Selector) error {
|
||||
return nodeutil.AddNodeSelectorIndexer(sharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector)
|
||||
}
|
||||
|
||||
func setupInformerIndexers(sharedInformerFactory informers.SharedInformerFactory, deschedulerPolicy *api.DeschedulerPolicy) (podutil.GetPodsAssignedToNodeFunc, error) {
|
||||
// create a new instance of the shared informer factory from the cached client
|
||||
// register the pod informer, otherwise it will not get running
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(sharedInformerFactory.Core().V1().Pods().Informer())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
// TODO(ingvagabund): copy paste all relevant indexers from the real client to the fake one
|
||||
// TODO(ingvagabund): register one indexer per each profile. Respect the precedence of no profile-level node selector is specified.
|
||||
// Also, keep a cache of node label selectors to detect duplicates to avoid creating an extra informer.
|
||||
|
||||
nodeSelector, err := nodeSelectorFromPolicy(deschedulerPolicy)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := addNodeSelectorIndexer(sharedInformerFactory, nodeSelector); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getPodsAssignedToNode, nil
|
||||
}
|
||||
|
||||
func metricsProviderListToMap(providersList []api.MetricsProvider) map[api.MetricsSource]*api.MetricsProvider {
|
||||
providersMap := make(map[api.MetricsSource]*api.MetricsProvider)
|
||||
for _, provider := range providersList {
|
||||
@@ -157,16 +270,19 @@ func metricsProviderListToMap(providersList []api.MetricsProvider) map[api.Metri
|
||||
func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, eventRecorder events.EventRecorder, sharedInformerFactory, namespacedSharedInformerFactory informers.SharedInformerFactory) (*descheduler, error) {
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
|
||||
|
||||
ir := newInformerResources(sharedInformerFactory)
|
||||
ir.Uses(v1.SchemeGroupVersion.WithResource("pods"),
|
||||
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
|
||||
// consistent with the real runs without having to keep the list here in sync.
|
||||
kubeClientSandbox, err := newKubeClientSandbox(rs.Client, sharedInformerFactory,
|
||||
v1.SchemeGroupVersion.WithResource("pods"),
|
||||
v1.SchemeGroupVersion.WithResource("nodes"),
|
||||
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
|
||||
// consistent with the real runs without having to keep the list here in sync.
|
||||
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
|
||||
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), // Used by the defaultevictor plugin
|
||||
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin
|
||||
v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"), // Used by the defaultevictor plugin
|
||||
) // Used by the defaultevictor plugin
|
||||
v1.SchemeGroupVersion.WithResource("namespaces"),
|
||||
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"),
|
||||
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"),
|
||||
v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create kube client sandbox: %v", err)
|
||||
}
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
@@ -194,29 +310,24 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu
|
||||
}
|
||||
|
||||
desch := &descheduler{
|
||||
rs: rs,
|
||||
ir: ir,
|
||||
getPodsAssignedToNode: getPodsAssignedToNode,
|
||||
sharedInformerFactory: sharedInformerFactory,
|
||||
deschedulerPolicy: deschedulerPolicy,
|
||||
eventRecorder: eventRecorder,
|
||||
podEvictor: podEvictor,
|
||||
podEvictionReactionFnc: podEvictionReactionFnc,
|
||||
prometheusClient: rs.PrometheusClient,
|
||||
queue: workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: "descheduler"}),
|
||||
metricsProviders: metricsProviderListToMap(deschedulerPolicy.MetricsProviders),
|
||||
rs: rs,
|
||||
kubeClientSandbox: kubeClientSandbox,
|
||||
getPodsAssignedToNode: getPodsAssignedToNode,
|
||||
sharedInformerFactory: sharedInformerFactory,
|
||||
deschedulerPolicy: deschedulerPolicy,
|
||||
eventRecorder: eventRecorder,
|
||||
podEvictor: podEvictor,
|
||||
prometheusClient: rs.PrometheusClient,
|
||||
queue: workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: "descheduler"}),
|
||||
metricsProviders: metricsProviderListToMap(deschedulerPolicy.MetricsProviders),
|
||||
}
|
||||
|
||||
nodeSelector := labels.Everything()
|
||||
if deschedulerPolicy.NodeSelector != nil {
|
||||
sel, err := labels.Parse(*deschedulerPolicy.NodeSelector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodeSelector = sel
|
||||
nodeSelector, err := nodeSelectorFromPolicy(deschedulerPolicy)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := nodeutil.AddNodeSelectorIndexer(sharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector); err != nil {
|
||||
if err := addNodeSelectorIndexer(sharedInformerFactory, nodeSelector); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -367,46 +478,24 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {
|
||||
if d.rs.DryRun {
|
||||
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
|
||||
// Create a new cache so we start from scratch without any leftovers
|
||||
fakeClient := fakeclientset.NewSimpleClientset()
|
||||
// simulate a pod eviction by deleting a pod
|
||||
fakeClient.PrependReactor("create", "pods", d.podEvictionReactionFnc(fakeClient))
|
||||
fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
|
||||
err := d.ir.CopyTo(fakeClient, fakeSharedInformerFactory)
|
||||
err := d.kubeClientSandbox.buildSandbox()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// create a new instance of the shared informer factor from the cached client
|
||||
// register the pod informer, otherwise it will not get running
|
||||
d.getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods().Informer())
|
||||
getPodsAssignedToNode, err := setupInformerIndexers(d.kubeClientSandbox.fakeSharedInformerFactory(), d.deschedulerPolicy)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
nodeSelector := labels.Everything()
|
||||
if d.deschedulerPolicy.NodeSelector != nil {
|
||||
sel, err := labels.Parse(*d.deschedulerPolicy.NodeSelector)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
nodeSelector = sel
|
||||
}
|
||||
// TODO(ingvagabund): copy paste all relevant indexers from the real client to the fake one
|
||||
// TODO(ingvagabund): register one indexer per each profile. Respect the precedence of no profile-level node selector is specified.
|
||||
// Also, keep a cache of node label selectors to detect duplicates to avoid creating an extra informer.
|
||||
|
||||
if err := nodeutil.AddNodeSelectorIndexer(fakeSharedInformerFactory.Core().V1().Nodes().Informer(), indexerNodeSelectorGlobal, nodeSelector); err != nil {
|
||||
return err
|
||||
}
|
||||
d.getPodsAssignedToNode = getPodsAssignedToNode
|
||||
|
||||
fakeCtx, cncl := context.WithCancel(context.TODO())
|
||||
defer cncl()
|
||||
fakeSharedInformerFactory.Start(fakeCtx.Done())
|
||||
fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
|
||||
d.kubeClientSandbox.fakeSharedInformerFactory().Start(fakeCtx.Done())
|
||||
d.kubeClientSandbox.fakeSharedInformerFactory().WaitForCacheSync(fakeCtx.Done())
|
||||
|
||||
client = fakeClient
|
||||
d.sharedInformerFactory = fakeSharedInformerFactory
|
||||
client = d.kubeClientSandbox.fakeClient()
|
||||
d.sharedInformerFactory = d.kubeClientSandbox.fakeSharedInformerFactory()
|
||||
} else {
|
||||
client = d.rs.Client
|
||||
}
|
||||
@@ -417,6 +506,10 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context) error {
|
||||
|
||||
d.runProfiles(ctx, client)
|
||||
|
||||
if d.rs.DryRun {
|
||||
d.kubeClientSandbox.reset()
|
||||
}
|
||||
|
||||
klog.V(1).InfoS("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())
|
||||
|
||||
return nil
|
||||
@@ -591,7 +684,7 @@ func validateVersionCompatibility(discovery discovery.DiscoveryInterface, desche
|
||||
return nil
|
||||
}
|
||||
|
||||
func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
|
||||
func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset, evictedCache *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
|
||||
return func(action core.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() == "eviction" {
|
||||
createAct, matched := action.(core.CreateActionImpl)
|
||||
@@ -602,6 +695,16 @@ func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action cor
|
||||
if !matched {
|
||||
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
|
||||
}
|
||||
podObj, err := fakeClient.Tracker().Get(action.GetResource(), eviction.GetNamespace(), eviction.GetName())
|
||||
if err == nil {
|
||||
if pod, ok := podObj.(*v1.Pod); ok {
|
||||
evictedCache.add(pod)
|
||||
} else {
|
||||
return false, nil, fmt.Errorf("unable to convert object to *v1.Pod for %v/%v", eviction.GetNamespace(), eviction.GetName())
|
||||
}
|
||||
} else if !apierrors.IsNotFound(err) {
|
||||
return false, nil, fmt.Errorf("unable to get pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
|
||||
}
|
||||
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
|
||||
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -18,6 +19,7 @@ import (
|
||||
apiversion "k8s.io/apimachinery/pkg/version"
|
||||
fakediscovery "k8s.io/client-go/discovery/fake"
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
fakeclientset "k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"k8s.io/component-base/featuregate"
|
||||
@@ -531,14 +533,10 @@ func TestPodEvictorReset(t *testing.T) {
|
||||
client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, nil, nil))
|
||||
|
||||
var fakeEvictedPods []string
|
||||
descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
|
||||
return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, nil)
|
||||
}
|
||||
|
||||
for i, cycle := range tc.cycles {
|
||||
if err := descheduler.runDeschedulerLoop(ctx); err != nil {
|
||||
t.Fatalf("Cycle %d: Unable to run a descheduling loop: %v", i+1, err)
|
||||
}
|
||||
evictedPodNames := runDeschedulerLoopAndGetEvictedPods(ctx, t, descheduler, tc.dryRun)
|
||||
fakeEvictedPods = append(fakeEvictedPods, evictedPodNames...)
|
||||
|
||||
if descheduler.podEvictor.TotalEvicted() != cycle.expectedTotalEvicted || len(evictedPods) != cycle.expectedRealEvictions || len(fakeEvictedPods) != cycle.expectedFakeEvictions {
|
||||
t.Fatalf("Cycle %d: Expected (%v,%v,%v) pods evicted, got (%v,%v,%v) instead", i+1, cycle.expectedTotalEvicted, cycle.expectedRealEvictions, cycle.expectedFakeEvictions, descheduler.podEvictor.TotalEvicted(), len(evictedPods), len(fakeEvictedPods))
|
||||
}
|
||||
@@ -547,6 +545,49 @@ func TestPodEvictorReset(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// runDeschedulerLoopAndGetEvictedPods is a temporary duplication from runDeschedulerLoop
|
||||
// that will be removed after kubeClientSandbox gets migrated to event handlers.
|
||||
func runDeschedulerLoopAndGetEvictedPods(ctx context.Context, t *testing.T, d *descheduler, dryRun bool) []string {
|
||||
var clientSet clientset.Interface
|
||||
if dryRun {
|
||||
if err := d.kubeClientSandbox.buildSandbox(); err != nil {
|
||||
t.Fatalf("Failed to build sandbox: %v", err)
|
||||
}
|
||||
|
||||
getPodsAssignedToNode, err := setupInformerIndexers(d.kubeClientSandbox.fakeSharedInformerFactory(), d.deschedulerPolicy)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to setup indexers: %v", err)
|
||||
}
|
||||
d.getPodsAssignedToNode = getPodsAssignedToNode
|
||||
|
||||
fakeCtx, cncl := context.WithCancel(context.TODO())
|
||||
defer cncl()
|
||||
d.kubeClientSandbox.fakeSharedInformerFactory().Start(fakeCtx.Done())
|
||||
d.kubeClientSandbox.fakeSharedInformerFactory().WaitForCacheSync(fakeCtx.Done())
|
||||
|
||||
clientSet = d.kubeClientSandbox.fakeClient()
|
||||
d.sharedInformerFactory = d.kubeClientSandbox.fakeSharedInformerFactory()
|
||||
} else {
|
||||
clientSet = d.rs.Client
|
||||
}
|
||||
|
||||
d.podEvictor.SetClient(clientSet)
|
||||
d.podEvictor.ResetCounters()
|
||||
|
||||
d.runProfiles(ctx, clientSet)
|
||||
|
||||
var evictedPodNames []string
|
||||
if dryRun {
|
||||
evictedPodsFromCache := d.kubeClientSandbox.evictedPodsCache.list()
|
||||
for _, pod := range evictedPodsFromCache {
|
||||
evictedPodNames = append(evictedPodNames, pod.Name)
|
||||
}
|
||||
d.kubeClientSandbox.reset()
|
||||
}
|
||||
|
||||
return evictedPodNames
|
||||
}
|
||||
|
||||
func checkTotals(t *testing.T, ctx context.Context, descheduler *descheduler, totalEvictionRequests, totalEvicted uint) {
|
||||
if total := descheduler.podEvictor.TotalEvictionRequests(); total != totalEvictionRequests {
|
||||
t.Fatalf("Expected %v total eviction requests, got %v instead", totalEvictionRequests, total)
|
||||
@@ -602,7 +643,7 @@ func TestEvictionRequestsCache(t *testing.T) {
|
||||
defer cancel()
|
||||
|
||||
var fakeEvictedPods []string
|
||||
descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
|
||||
descheduler.kubeClientSandbox.podEvictionReactionFnc = func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
|
||||
return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, podEvictionError)
|
||||
}
|
||||
|
||||
@@ -743,7 +784,7 @@ func TestDeschedulingLimits(t *testing.T) {
|
||||
defer cancel()
|
||||
|
||||
var fakeEvictedPods []string
|
||||
descheduler.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
|
||||
descheduler.kubeClientSandbox.podEvictionReactionFnc = func(*fakeclientset.Clientset, *evictedPodsCache) func(action core.Action) (bool, runtime.Object, error) {
|
||||
return podEvictionReactionTestingFnc(&fakeEvictedPods, nil, podEvictionError)
|
||||
}
|
||||
|
||||
@@ -955,15 +996,11 @@ func TestNodeLabelSelectorBasedEviction(t *testing.T) {
|
||||
var evictedPods []string
|
||||
if !tc.dryRun {
|
||||
client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, nil, nil))
|
||||
} else {
|
||||
deschedulerInstance.podEvictionReactionFnc = func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error) {
|
||||
return podEvictionReactionTestingFnc(&evictedPods, nil, nil)
|
||||
}
|
||||
}
|
||||
|
||||
// Run descheduler
|
||||
if err := deschedulerInstance.runDeschedulerLoop(ctx); err != nil {
|
||||
t.Fatalf("Unable to run descheduler loop: %v", err)
|
||||
evictedPodNames := runDeschedulerLoopAndGetEvictedPods(ctx, t, deschedulerInstance, tc.dryRun)
|
||||
if tc.dryRun {
|
||||
evictedPods = evictedPodNames
|
||||
}
|
||||
|
||||
// Collect which nodes had pods evicted from them
|
||||
@@ -1082,3 +1119,375 @@ func TestLoadAwareDescheduling(t *testing.T) {
|
||||
}
|
||||
t.Logf("Total evictions: %v", totalEs)
|
||||
}
|
||||
|
||||
func TestKubeClientSandboxReset(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, test.SetRSOwnerRef)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, test.SetRSOwnerRef)
|
||||
|
||||
client := fakeclientset.NewSimpleClientset(node1, p1, p2)
|
||||
sharedInformerFactory := informers.NewSharedInformerFactoryWithOptions(client, 0)
|
||||
|
||||
// Explicitly get the informers to ensure they're registered
|
||||
_ = sharedInformerFactory.Core().V1().Pods().Informer()
|
||||
_ = sharedInformerFactory.Core().V1().Nodes().Informer()
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
sandbox, err := newKubeClientSandbox(client, sharedInformerFactory,
|
||||
v1.SchemeGroupVersion.WithResource("pods"),
|
||||
v1.SchemeGroupVersion.WithResource("nodes"),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create kubeClientSandbox: %v", err)
|
||||
}
|
||||
|
||||
if err := sandbox.buildSandbox(); err != nil {
|
||||
t.Fatalf("Failed to build sandbox: %v", err)
|
||||
}
|
||||
|
||||
eviction1 := &policy.Eviction{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: p1.Name,
|
||||
Namespace: p1.Namespace,
|
||||
},
|
||||
}
|
||||
eviction2 := &policy.Eviction{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: p2.Name,
|
||||
Namespace: p2.Namespace,
|
||||
},
|
||||
}
|
||||
|
||||
if err := sandbox.fakeClient().CoreV1().Pods(p1.Namespace).EvictV1(context.TODO(), eviction1); err != nil {
|
||||
t.Fatalf("Error evicting p1: %v", err)
|
||||
}
|
||||
if err := sandbox.fakeClient().CoreV1().Pods(p2.Namespace).EvictV1(context.TODO(), eviction2); err != nil {
|
||||
t.Fatalf("Error evicting p2: %v", err)
|
||||
}
|
||||
|
||||
evictedPods := sandbox.evictedPodsCache.list()
|
||||
if len(evictedPods) != 2 {
|
||||
t.Fatalf("Expected 2 evicted pods in cache, but got %d", len(evictedPods))
|
||||
}
|
||||
t.Logf("Evicted pods in cache before reset: %d", len(evictedPods))
|
||||
|
||||
for _, evictedPod := range evictedPods {
|
||||
if evictedPod.Namespace == "" || evictedPod.Name == "" || evictedPod.UID == "" {
|
||||
t.Errorf("Evicted pod has empty fields: namespace=%s, name=%s, uid=%s", evictedPod.Namespace, evictedPod.Name, evictedPod.UID)
|
||||
}
|
||||
t.Logf("Evicted pod: %s/%s (UID: %s)", evictedPod.Namespace, evictedPod.Name, evictedPod.UID)
|
||||
}
|
||||
|
||||
sandbox.reset()
|
||||
|
||||
evictedPodsAfterReset := sandbox.evictedPodsCache.list()
|
||||
if len(evictedPodsAfterReset) != 0 {
|
||||
t.Fatalf("Expected cache to be empty after reset, but found %d pods", len(evictedPodsAfterReset))
|
||||
}
|
||||
t.Logf("Successfully verified cache is empty after reset")
|
||||
}
|
||||
|
||||
func TestEvictedPodsCache(t *testing.T) {
|
||||
t.Run("add single pod", func(t *testing.T) {
|
||||
const (
|
||||
podName = "pod1"
|
||||
podNamespace = "default"
|
||||
podUID = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
|
||||
)
|
||||
cache := newEvictedPodsCache()
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: podNamespace,
|
||||
UID: podUID,
|
||||
},
|
||||
}
|
||||
|
||||
cache.add(pod)
|
||||
|
||||
pods := cache.list()
|
||||
if len(pods) != 1 {
|
||||
t.Fatalf("Expected 1 pod in cache, got %d", len(pods))
|
||||
}
|
||||
if pods[0].Name != podName || pods[0].Namespace != podNamespace || pods[0].UID != podUID {
|
||||
t.Errorf("Pod data mismatch: got name=%s, namespace=%s, uid=%s", pods[0].Name, pods[0].Namespace, pods[0].UID)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("add multiple pods", func(t *testing.T) {
|
||||
cache := newEvictedPodsCache()
|
||||
pods := []*v1.Pod{
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "11111111-1111-1111-1111-111111111111"}},
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "22222222-2222-2222-2222-222222222222"}},
|
||||
{ObjectMeta: metav1.ObjectMeta{Name: "pod3", Namespace: "default", UID: "33333333-3333-3333-3333-333333333333"}},
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
cache.add(pod)
|
||||
}
|
||||
|
||||
cachedPods := cache.list()
|
||||
if len(cachedPods) != 3 {
|
||||
t.Fatalf("Expected 3 pods in cache, got %d", len(cachedPods))
|
||||
}
|
||||
|
||||
podMap := make(map[string]*evictedPodInfo)
|
||||
for _, cachedPod := range cachedPods {
|
||||
podMap[cachedPod.UID] = cachedPod
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
cached, ok := podMap[string(pod.UID)]
|
||||
if !ok {
|
||||
t.Errorf("Pod with UID %s not found in cache", pod.UID)
|
||||
continue
|
||||
}
|
||||
if cached.Name != pod.Name || cached.Namespace != pod.Namespace {
|
||||
t.Errorf("Pod data mismatch for UID %s: got name=%s, namespace=%s", pod.UID, cached.Name, cached.Namespace)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("add duplicate pod updates entry", func(t *testing.T) {
|
||||
const (
|
||||
duplicateUID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
|
||||
updatedPodName = "pod1-new"
|
||||
updatedPodNS = "kube-system"
|
||||
)
|
||||
cache := newEvictedPodsCache()
|
||||
pod1 := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "pod1",
|
||||
Namespace: "default",
|
||||
UID: duplicateUID,
|
||||
},
|
||||
}
|
||||
pod2 := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: updatedPodName,
|
||||
Namespace: updatedPodNS,
|
||||
UID: duplicateUID,
|
||||
},
|
||||
}
|
||||
|
||||
cache.add(pod1)
|
||||
cache.add(pod2)
|
||||
|
||||
pods := cache.list()
|
||||
if len(pods) != 1 {
|
||||
t.Fatalf("Expected 1 pod in cache (duplicates should overwrite), got %d", len(pods))
|
||||
}
|
||||
if pods[0].Name != updatedPodName || pods[0].Namespace != updatedPodNS {
|
||||
t.Errorf("Expected pod2 data, got name=%s, namespace=%s", pods[0].Name, pods[0].Namespace)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("list returns empty array for empty cache", func(t *testing.T) {
|
||||
cache := newEvictedPodsCache()
|
||||
pods := cache.list()
|
||||
if pods == nil {
|
||||
t.Fatal("Expected non-nil slice from list()")
|
||||
}
|
||||
if len(pods) != 0 {
|
||||
t.Fatalf("Expected empty list, got %d pods", len(pods))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("list returns copies not references", func(t *testing.T) {
|
||||
const originalPodName = "pod1"
|
||||
cache := newEvictedPodsCache()
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: originalPodName,
|
||||
Namespace: "default",
|
||||
UID: "12345678-1234-1234-1234-123456789abc",
|
||||
},
|
||||
}
|
||||
cache.add(pod)
|
||||
|
||||
pods1 := cache.list()
|
||||
pods2 := cache.list()
|
||||
|
||||
if len(pods1) != 1 || len(pods2) != 1 {
|
||||
t.Fatalf("Expected 1 pod in both lists")
|
||||
}
|
||||
|
||||
pods1[0].Name = "modified"
|
||||
|
||||
if pods2[0].Name == "modified" {
|
||||
t.Error("Modifying list result should not affect other list results (should be copies)")
|
||||
}
|
||||
|
||||
pods3 := cache.list()
|
||||
if pods3[0].Name != originalPodName {
|
||||
t.Error("Cache data was modified, list() should return copies")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("clear empties the cache", func(t *testing.T) {
|
||||
cache := newEvictedPodsCache()
|
||||
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "aaaa0000-0000-0000-0000-000000000001"}})
|
||||
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "bbbb0000-0000-0000-0000-000000000002"}})
|
||||
|
||||
if len(cache.list()) != 2 {
|
||||
t.Fatal("Expected 2 pods before clear")
|
||||
}
|
||||
|
||||
cache.clear()
|
||||
|
||||
pods := cache.list()
|
||||
if len(pods) != 0 {
|
||||
t.Fatalf("Expected empty cache after clear, got %d pods", len(pods))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("clear on empty cache is safe", func(t *testing.T) {
|
||||
cache := newEvictedPodsCache()
|
||||
cache.clear()
|
||||
|
||||
pods := cache.list()
|
||||
if len(pods) != 0 {
|
||||
t.Fatalf("Expected empty cache, got %d pods", len(pods))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("add after clear works correctly", func(t *testing.T) {
|
||||
cache := newEvictedPodsCache()
|
||||
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", Namespace: "default", UID: "00000001-0001-0001-0001-000000000001"}})
|
||||
cache.clear()
|
||||
cache.add(&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod2", Namespace: "kube-system", UID: "00000002-0002-0002-0002-000000000002"}})
|
||||
|
||||
pods := cache.list()
|
||||
if len(pods) != 1 {
|
||||
t.Fatalf("Expected 1 pod after clear and add, got %d", len(pods))
|
||||
}
|
||||
if pods[0].Name != "pod2" {
|
||||
t.Errorf("Expected pod2, got %s", pods[0].Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestPodEvictionReactionFncErrorHandling(t *testing.T) {
|
||||
podsGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
setupFnc func(*fakeclientset.Clientset) (name, namespace string)
|
||||
expectHandled bool
|
||||
expectError bool
|
||||
errorContains string
|
||||
expectedCacheLen int
|
||||
}{
|
||||
{
|
||||
name: "handles pod eviction successfully and adds to cache",
|
||||
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
|
||||
pod := test.BuildTestPod("pod1", 100, 0, "node1", test.SetRSOwnerRef)
|
||||
err := fakeClient.Tracker().Add(pod)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add pod: %v", err)
|
||||
}
|
||||
return pod.Name, pod.Namespace
|
||||
},
|
||||
expectHandled: true,
|
||||
expectError: false,
|
||||
expectedCacheLen: 1,
|
||||
},
|
||||
{
|
||||
name: "returns false and error when delete fails allowing other reactors to handle",
|
||||
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
|
||||
pod := test.BuildTestPod("pod1", 100, 0, "node1", test.SetRSOwnerRef)
|
||||
if err := fakeClient.Tracker().Add(pod); err != nil {
|
||||
t.Fatalf("Failed to add pod: %v", err)
|
||||
}
|
||||
if err := fakeClient.Tracker().Delete(podsGVR, pod.Namespace, pod.Name); err != nil {
|
||||
t.Fatalf("Failed to pre-delete pod: %v", err)
|
||||
}
|
||||
return pod.Name, pod.Namespace
|
||||
},
|
||||
expectHandled: false,
|
||||
expectError: true,
|
||||
errorContains: "unable to delete pod",
|
||||
expectedCacheLen: 0,
|
||||
},
|
||||
{
|
||||
name: "returns error when pod doesn't exist in tracker from the start",
|
||||
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
|
||||
// Don't add the pod to the tracker at all
|
||||
return "nonexistent-pod", "default"
|
||||
},
|
||||
expectHandled: false,
|
||||
expectError: true,
|
||||
errorContains: "unable to delete pod",
|
||||
expectedCacheLen: 0,
|
||||
},
|
||||
{
|
||||
name: "returns error when object is not a pod",
|
||||
setupFnc: func(fakeClient *fakeclientset.Clientset) (string, string) {
|
||||
configMap := &v1.ConfigMap{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test-config",
|
||||
Namespace: "default",
|
||||
},
|
||||
}
|
||||
if err := fakeClient.Tracker().Create(podsGVR, configMap, "default"); err != nil {
|
||||
t.Fatalf("Failed to add ConfigMap to pods resource: %v", err)
|
||||
}
|
||||
return configMap.Name, configMap.Namespace
|
||||
},
|
||||
expectHandled: false,
|
||||
expectError: true,
|
||||
errorContains: "unable to convert object to *v1.Pod",
|
||||
expectedCacheLen: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
fakeClient := fakeclientset.NewSimpleClientset()
|
||||
cache := newEvictedPodsCache()
|
||||
|
||||
name, namespace := tc.setupFnc(fakeClient)
|
||||
|
||||
reactionFnc := podEvictionReactionFnc(fakeClient, cache)
|
||||
|
||||
handled, _, err := reactionFnc(core.NewCreateSubresourceAction(
|
||||
podsGVR,
|
||||
name,
|
||||
"eviction",
|
||||
namespace,
|
||||
&policy.Eviction{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: namespace,
|
||||
},
|
||||
},
|
||||
))
|
||||
|
||||
if handled != tc.expectHandled {
|
||||
t.Errorf("Expected handled=%v, got %v", tc.expectHandled, handled)
|
||||
}
|
||||
|
||||
if tc.expectError {
|
||||
if err == nil {
|
||||
t.Fatal("Expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), tc.errorContains) {
|
||||
t.Errorf("Expected error message to contain '%s', got: %v", tc.errorContains, err)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(cache.list()) != tc.expectedCacheLen {
|
||||
t.Errorf("Expected %d pods in cache, got %d", tc.expectedCacheLen, len(cache.list()))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user