e2e: build a descheduler image and run the descheduler as a pod

2026-01-26 05:14:13 +01:00 · 2024-07-21 17:42:52 +02:00
parent 355cff67c1
commit e4c361d902
12 changed files with 1307 additions and 69 deletions
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package e2e

 import (
+	"bufio"
 	"context"
 	"fmt"
 	"math"
@@ -26,12 +27,15 @@ import (
 	"testing"
 	"time"

+	"github.com/ghodss/yaml"
+
 	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	schedulingv1 "k8s.io/api/scheduling/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/util/intstr"
 	"k8s.io/apimachinery/pkg/util/uuid"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/informers"
@@ -39,10 +43,13 @@ import (
 	listersv1 "k8s.io/client-go/listers/core/v1"
 	"k8s.io/client-go/tools/events"
 	componentbaseconfig "k8s.io/component-base/config"
+	"k8s.io/klog/v2"
 	utilptr "k8s.io/utils/ptr"
+
 	"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
 	"sigs.k8s.io/descheduler/pkg/api"
 	deschedulerapi "sigs.k8s.io/descheduler/pkg/api"
+	deschedulerapiv1alpha2 "sigs.k8s.io/descheduler/pkg/api/v1alpha2"
 	"sigs.k8s.io/descheduler/pkg/descheduler"
 	"sigs.k8s.io/descheduler/pkg/descheduler/client"
 	"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
@@ -60,6 +67,205 @@ import (
 	"sigs.k8s.io/descheduler/test"
 )

+func isClientRateLimiterError(err error) bool {
+	return strings.Contains(err.Error(), "client rate limiter")
+}
+
+func deschedulerPolicyConfigMap(policy *deschedulerapiv1alpha2.DeschedulerPolicy) (*v1.ConfigMap, error) {
+	cm := &v1.ConfigMap{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "descheduler-policy-configmap",
+			Namespace: "kube-system",
+		},
+	}
+	policy.APIVersion = "descheduler/v1alpha2"
+	policy.Kind = "DeschedulerPolicy"
+	policyBytes, err := yaml.Marshal(policy)
+	if err != nil {
+		return nil, err
+	}
+	cm.Data = map[string]string{"policy.yaml": string(policyBytes)}
+	return cm, nil
+}
+
+func deschedulerDeployment(testName string) *appsv1.Deployment {
+	return &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "descheduler",
+			Namespace: "kube-system",
+			Labels:    map[string]string{"app": "descheduler", "test": testName},
+		},
+		Spec: appsv1.DeploymentSpec{
+			Replicas: utilptr.To[int32](1),
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{"app": "descheduler", "test": testName},
+			},
+			Template: v1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{"app": "descheduler", "test": testName},
+				},
+				Spec: v1.PodSpec{
+					PriorityClassName:  "system-cluster-critical",
+					ServiceAccountName: "descheduler-sa",
+					SecurityContext: &v1.PodSecurityContext{
+						RunAsNonRoot: utilptr.To(true),
+						RunAsUser:    utilptr.To[int64](1000),
+						RunAsGroup:   utilptr.To[int64](1000),
+						SeccompProfile: &v1.SeccompProfile{
+							Type: v1.SeccompProfileTypeRuntimeDefault,
+						},
+					},
+					Containers: []v1.Container{
+						{
+							Name:            "descheduler",
+							Image:           os.Getenv("DESCHEDULER_IMAGE"),
+							ImagePullPolicy: "IfNotPresent",
+							Command:         []string{"/bin/descheduler"},
+							Args:            []string{"--policy-config-file", "/policy-dir/policy.yaml", "--descheduling-interval", "100m", "--v", "4"},
+							Ports:           []v1.ContainerPort{{ContainerPort: 10258, Protocol: "TCP"}},
+							LivenessProbe: &v1.Probe{
+								FailureThreshold: 3,
+								ProbeHandler: v1.ProbeHandler{
+									HTTPGet: &v1.HTTPGetAction{
+										Path:   "/healthz",
+										Port:   intstr.FromInt(10258),
+										Scheme: v1.URISchemeHTTPS,
+									},
+								},
+								InitialDelaySeconds: 3,
+								PeriodSeconds:       10,
+							},
+							Resources: v1.ResourceRequirements{
+								Requests: v1.ResourceList{
+									v1.ResourceCPU:    resource.MustParse("500m"),
+									v1.ResourceMemory: resource.MustParse("256Mi"),
+								},
+							},
+							SecurityContext: &v1.SecurityContext{
+								AllowPrivilegeEscalation: utilptr.To(false),
+								Capabilities: &v1.Capabilities{
+									Drop: []v1.Capability{
+										"ALL",
+									},
+								},
+								Privileged:             utilptr.To[bool](false),
+								ReadOnlyRootFilesystem: utilptr.To[bool](true),
+								RunAsNonRoot:           utilptr.To[bool](true),
+							},
+							VolumeMounts: []v1.VolumeMount{
+								{
+									MountPath: "/policy-dir",
+									Name:      "policy-volume",
+								},
+							},
+						},
+					},
+					Volumes: []v1.Volume{
+						{
+							Name: "policy-volume",
+							VolumeSource: v1.VolumeSource{
+								ConfigMap: &v1.ConfigMapVolumeSource{
+									LocalObjectReference: v1.LocalObjectReference{
+										Name: "descheduler-policy-configmap",
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func printPodLogs(ctx context.Context, t *testing.T, kubeClient clientset.Interface, podName string) {
+	request := kubeClient.CoreV1().Pods("kube-system").GetLogs(podName, &v1.PodLogOptions{})
+	readCloser, err := request.Stream(context.TODO())
+	if err != nil {
+		t.Logf("Unable to request stream: %v\n", err)
+		return
+	}
+
+	defer readCloser.Close()
+	scanner := bufio.NewScanner(readCloser)
+	for scanner.Scan() {
+		fmt.Println(string(scanner.Bytes()))
+	}
+	if err := scanner.Err(); err != nil {
+		t.Logf("Unable to scan bytes: %v\n", err)
+	}
+}
+
+func waitForDeschedulerPodRunning(t *testing.T, ctx context.Context, kubeClient clientset.Interface, testName string) string {
+	deschedulerPodName := ""
+	if err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
+		podList, err := kubeClient.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
+			LabelSelector: labels.SelectorFromSet(labels.Set(map[string]string{"app": "descheduler", "test": testName})).String(),
+		})
+		if err != nil {
+			t.Logf("Unable to list pods: %v", err)
+			if isClientRateLimiterError(err) {
+				return false, nil
+			}
+			return false, err
+		}
+
+		runningPods := []*v1.Pod{}
+		for _, item := range podList.Items {
+			if item.Status.Phase != v1.PodRunning {
+				continue
+			}
+			pod := item
+			runningPods = append(runningPods, &pod)
+		}
+
+		if len(runningPods) != 1 {
+			t.Logf("Expected a single running pod, got %v instead", len(runningPods))
+			return false, nil
+		}
+
+		deschedulerPodName = runningPods[0].Name
+		t.Logf("Found a descheduler pod running: %v", deschedulerPodName)
+		return true, nil
+	}); err != nil {
+		t.Fatalf("Error waiting for a running descheduler: %v", err)
+	}
+	return deschedulerPodName
+}
+
+func waitForDeschedulerPodAbsent(t *testing.T, ctx context.Context, kubeClient clientset.Interface, testName string) {
+	if err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
+		podList, err := kubeClient.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
+			LabelSelector: labels.SelectorFromSet(labels.Set(map[string]string{"app": "descheduler", "test": testName})).String(),
+		})
+		if err != nil {
+			t.Logf("Unable to list pods: %v", err)
+			if isClientRateLimiterError(err) {
+				return false, nil
+			}
+			return false, err
+		}
+
+		if len(podList.Items) > 0 {
+			t.Logf("Found a descheduler pod. Waiting until it gets deleted")
+			return false, nil
+		}
+
+		return true, nil
+	}); err != nil {
+		t.Fatalf("Error waiting for a descheduler to disapear: %v", err)
+	}
+}
+
+func TestMain(m *testing.M) {
+	if os.Getenv("DESCHEDULER_IMAGE") == "" {
+		klog.Errorf("DESCHEDULER_IMAGE env is not set")
+		os.Exit(1)
+	}
+
+	os.Exit(m.Run())
+}
+
 func initPluginRegistry() {
 	pluginregistry.PluginRegistry = pluginregistry.NewRegistry()
 	pluginregistry.Register(defaultevictor.PluginName, defaultevictor.New, &defaultevictor.DefaultEvictor{}, &defaultevictor.DefaultEvictorArgs{}, defaultevictor.ValidateDefaultEvictorArgs, defaultevictor.SetDefaults_DefaultEvictorArgs, pluginregistry.PluginRegistry)
--- a/test/e2e/e2e_toomanyrestarts_test.go
+++ b/test/e2e/e2e_toomanyrestarts_test.go
@@ -18,7 +18,6 @@ package e2e

 import (
 	"context"
-	"fmt"
 	"os"
 	"strings"
 	"testing"
@@ -28,48 +27,57 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
 	clientset "k8s.io/client-go/kubernetes"
 	componentbaseconfig "k8s.io/component-base/config"
 	utilptr "k8s.io/utils/ptr"

 	"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
 	"sigs.k8s.io/descheduler/pkg/api"
-	"sigs.k8s.io/descheduler/pkg/descheduler"
+	apiv1alpha2 "sigs.k8s.io/descheduler/pkg/api/v1alpha2"
 	"sigs.k8s.io/descheduler/pkg/descheduler/client"
 	"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
 	"sigs.k8s.io/descheduler/pkg/framework/plugins/removepodshavingtoomanyrestarts"
 )

-func tooManyRestartsPolicy(targetNamespace string, podRestartThresholds int32, includingInitContainers bool) *api.DeschedulerPolicy {
-	return &api.DeschedulerPolicy{
-		Profiles: []api.DeschedulerProfile{
+const deploymentReplicas = 4
+
+func tooManyRestartsPolicy(targetNamespace string, podRestartThresholds int32, includingInitContainers bool) *apiv1alpha2.DeschedulerPolicy {
+	return &apiv1alpha2.DeschedulerPolicy{
+		Profiles: []apiv1alpha2.DeschedulerProfile{
 			{
 				Name: "TooManyRestartsProfile",
-				PluginConfigs: []api.PluginConfig{
+				PluginConfigs: []apiv1alpha2.PluginConfig{
 					{
 						Name: removepodshavingtoomanyrestarts.PluginName,
-						Args: &removepodshavingtoomanyrestarts.RemovePodsHavingTooManyRestartsArgs{
-							PodRestartThreshold:     podRestartThresholds,
-							IncludingInitContainers: includingInitContainers,
-							Namespaces: &api.Namespaces{
-								Include: []string{targetNamespace},
+						Args: runtime.RawExtension{
+							Object: &removepodshavingtoomanyrestarts.RemovePodsHavingTooManyRestartsArgs{
+								PodRestartThreshold:     podRestartThresholds,
+								IncludingInitContainers: includingInitContainers,
+								Namespaces: &api.Namespaces{
+									Include: []string{targetNamespace},
+								},
 							},
 						},
 					},
 					{
 						Name: defaultevictor.PluginName,
-						Args: &defaultevictor.DefaultEvictorArgs{
-							EvictLocalStoragePods: true,
+						Args: runtime.RawExtension{
+							Object: &defaultevictor.DefaultEvictorArgs{
+								EvictLocalStoragePods: true,
+							},
 						},
 					},
 				},
-				Plugins: api.Plugins{
-					Filter: api.PluginSet{
+				Plugins: apiv1alpha2.Plugins{
+					Filter: apiv1alpha2.PluginSet{
 						Enabled: []string{
 							defaultevictor.PluginName,
 						},
 					},
-					Deschedule: api.PluginSet{
+					Deschedule: apiv1alpha2.PluginSet{
 						Enabled: []string{
 							removepodshavingtoomanyrestarts.PluginName,
 						},
@@ -103,7 +111,7 @@ func TestTooManyRestarts(t *testing.T) {
 			Labels:    map[string]string{"test": "restart-pod", "name": "test-toomanyrestarts"},
 		},
 		Spec: appsv1.DeploymentSpec{
-			Replicas: utilptr.To[int32](4),
+			Replicas: utilptr.To[int32](deploymentReplicas),
 			Selector: &metav1.LabelSelector{
 				MatchLabels: map[string]string{"test": "restart-pod", "name": "test-toomanyrestarts"},
 			},
@@ -154,18 +162,12 @@ func TestTooManyRestarts(t *testing.T) {
 	}
 	defer clientSet.AppsV1().Deployments(deploymentObj.Namespace).Delete(ctx, deploymentObj.Name, metav1.DeleteOptions{})

-	// Need to wait restartCount more than 4
-	result, err := waitPodRestartCount(ctx, clientSet, testNamespace.Name, t)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	if !result {
-		t.Fatal("Pod restart count not as expected")
-	}
+	// Wait for 3 restarts
+	waitPodRestartCount(ctx, clientSet, testNamespace.Name, t, 3)

 	tests := []struct {
 		name                    string
-		policy                  *api.DeschedulerPolicy
+		policy                  *apiv1alpha2.DeschedulerPolicy
 		expectedEvictedPodCount uint
 	}{
 		{
@@ -175,7 +177,7 @@ func TestTooManyRestarts(t *testing.T) {
 		},
 		{
 			name:                    "test-one-evictions",
-			policy:                  tooManyRestartsPolicy(testNamespace.Name, 4, true),
+			policy:                  tooManyRestartsPolicy(testNamespace.Name, 3, true),
 			expectedEvictedPodCount: 4,
 		},
 	}
@@ -188,59 +190,95 @@ func TestTooManyRestarts(t *testing.T) {
 			rs.Client = clientSet
 			rs.EventClient = clientSet

-			preRunNames := getCurrentPodNames(t, ctx, clientSet, testNamespace.Name)
-			// Run RemovePodsHavingTooManyRestarts strategy
-			t.Log("Running RemovePodsHavingTooManyRestarts strategy")
-			err = descheduler.RunDeschedulerStrategies(ctx, rs, tc.policy, "v1")
+			preRunNames := sets.NewString(getCurrentPodNames(t, ctx, clientSet, testNamespace.Name)...)
+			// Deploy the descheduler with the configured policy
+			deschedulerPolicyConfigMapObj, err := deschedulerPolicyConfigMap(tc.policy)
 			if err != nil {
-				t.Fatalf("Failed running a descheduling cycle: %v", err)
+				t.Fatalf("Error creating %q CM: %v", deschedulerPolicyConfigMapObj.Name, err)
+			}
+			t.Logf("Creating %q policy CM with RemovePodsHavingTooManyRestarts configured...", deschedulerPolicyConfigMapObj.Name)
+			_, err = clientSet.CoreV1().ConfigMaps(deschedulerPolicyConfigMapObj.Namespace).Create(ctx, deschedulerPolicyConfigMapObj, metav1.CreateOptions{})
+			if err != nil {
+				t.Fatalf("Error creating %q CM: %v", deschedulerPolicyConfigMapObj.Name, err)
 			}

-			t.Logf("Finished RemoveFailedPods strategy for %s", tc.name)
+			defer func() {
+				t.Logf("Deleting %q CM...", deschedulerPolicyConfigMapObj.Name)
+				err = clientSet.CoreV1().ConfigMaps(deschedulerPolicyConfigMapObj.Namespace).Delete(ctx, deschedulerPolicyConfigMapObj.Name, metav1.DeleteOptions{})
+				if err != nil {
+					t.Fatalf("Unable to delete %q CM: %v", deschedulerPolicyConfigMapObj.Name, err)
+				}
+			}()
+
+			deschedulerDeploymentObj := deschedulerDeployment(testNamespace.Name)
+			t.Logf("Creating descheduler deployment %v", deschedulerDeploymentObj.Name)
+			_, err = clientSet.AppsV1().Deployments(deschedulerDeploymentObj.Namespace).Create(ctx, deschedulerDeploymentObj, metav1.CreateOptions{})
+			if err != nil {
+				t.Fatalf("Error creating %q deployment: %v", deschedulerDeploymentObj.Name, err)
+			}
+
+			deschedulerPodName := ""
+			defer func() {
+				if deschedulerPodName != "" {
+					printPodLogs(ctx, t, clientSet, deschedulerPodName)
+				}
+
+				t.Logf("Deleting %q deployment...", deschedulerDeploymentObj.Name)
+				err = clientSet.AppsV1().Deployments(deschedulerDeploymentObj.Namespace).Delete(ctx, deschedulerDeploymentObj.Name, metav1.DeleteOptions{})
+				if err != nil {
+					t.Fatalf("Unable to delete %q deployment: %v", deschedulerDeploymentObj.Name, err)
+				}
+				waitForDeschedulerPodAbsent(t, ctx, clientSet, testNamespace.Name)
+			}()
+
+			t.Logf("Waiting for the descheduler pod running")
+			deschedulerPodName = waitForDeschedulerPodRunning(t, ctx, clientSet, testNamespace.Name)
+
+			// Run RemovePodsHavingTooManyRestarts strategy
+			if err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 20*time.Second, true, func(ctx context.Context) (bool, error) {
+				currentRunNames := sets.NewString(getCurrentPodNames(t, ctx, clientSet, testNamespace.Name)...)
+				actualEvictedPod := preRunNames.Difference(currentRunNames)
+				actualEvictedPodCount := uint(actualEvictedPod.Len())
+				t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount)
+				if actualEvictedPodCount < tc.expectedEvictedPodCount {
+					t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount)
+					return false, nil
+				}
+
+				return true, nil
+			}); err != nil {
+				t.Errorf("Error waiting for descheduler running: %v", err)
+			}
 			waitForTerminatingPodsToDisappear(ctx, t, clientSet, testNamespace.Name)
-			afterRunNames := getCurrentPodNames(t, ctx, clientSet, testNamespace.Name)
-			namesInCommonCount := len(intersectStrings(preRunNames, afterRunNames))
-
-			t.Logf("preRunNames: %v, afterRunNames: %v, namesInCommonLen: %v\n", preRunNames, afterRunNames, namesInCommonCount)
-			actualEvictedPodCount := uint(len(afterRunNames) - namesInCommonCount)
-			if actualEvictedPodCount < tc.expectedEvictedPodCount {
-				t.Errorf("Test error for description: %s. Unexpected number of pods have been evicted, got %v, expected %v", tc.name, actualEvictedPodCount, tc.expectedEvictedPodCount)
-			}
 		})
 	}
 }

-func waitPodRestartCount(ctx context.Context, clientSet clientset.Interface, namespace string, t *testing.T) (bool, error) {
-	timeout := time.After(5 * time.Minute)
-	tick := time.Tick(5 * time.Second)
-	for {
-		select {
-		case <-timeout:
-			t.Log("Timeout, still restart count not as expected")
-			return false, fmt.Errorf("timeout Error")
-		case <-tick:
-			podList, err := clientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
-				LabelSelector: labels.SelectorFromSet(labels.Set(map[string]string{"test": "restart-pod", "name": "test-toomanyrestarts"})).String(),
-			})
-			if err != nil {
-				t.Fatalf("Unexpected err: %v", err)
-				return false, err
-			}
-			if len(podList.Items) < 4 {
-				t.Log("Waiting for 4 pods")
+func waitPodRestartCount(ctx context.Context, clientSet clientset.Interface, namespace string, t *testing.T, expectedNumberOfRestarts int) {
+	if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
+		podList, err := clientSet.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+			LabelSelector: labels.SelectorFromSet(labels.Set(map[string]string{"test": "restart-pod", "name": "test-toomanyrestarts"})).String(),
+		})
+		if err != nil {
+			t.Fatalf("Unexpected err: %v", err)
+			return false, err
+		}
+		if len(podList.Items) < expectedNumberOfRestarts {
+			t.Log("Waiting for 4 pods")
+			return false, nil
+		}
+		for i := 0; i < 4; i++ {
+			if len(podList.Items[i].Status.ContainerStatuses) < 1 {
+				t.Logf("Waiting for podList.Items[%v].Status.ContainerStatuses to be populated", i)
 				return false, nil
 			}
-			for i := 0; i < 4; i++ {
-				if len(podList.Items[0].Status.ContainerStatuses) < 1 {
-					t.Logf("Waiting for podList.Items[%v].Status.ContainerStatuses to be populated", i)
-					return false, nil
-				}
-			}
-
-			if podList.Items[0].Status.ContainerStatuses[0].RestartCount >= 4 && podList.Items[1].Status.ContainerStatuses[0].RestartCount >= 4 && podList.Items[2].Status.ContainerStatuses[0].RestartCount >= 4 && podList.Items[3].Status.ContainerStatuses[0].RestartCount >= 4 {
-				t.Log("Pod restartCount as expected")
-				return true, nil
+			if podList.Items[i].Status.ContainerStatuses[0].RestartCount < int32(expectedNumberOfRestarts) {
+				t.Logf("podList.Items[%v].Status.ContainerStatuses[0].RestartCount (%v) < %v", i, podList.Items[i].Status.ContainerStatuses[0].RestartCount, expectedNumberOfRestarts)
+				return false, nil
 			}
 		}
+		return true, nil
+	}); err != nil {
+		t.Fatalf("Error waiting for a workload running: %v", err)
 	}
 }