[nodeutilization]: actual usage client through kubernetes metrics

2026-01-26 13:29:11 +01:00 · 2024-11-07 16:23:16 +01:00
parent c86416612e
commit 6567f01e86
25 changed files with 1643 additions and 134 deletions
--- a/test/e2e/e2e_lownodeutilization_test.go
+++ b/test/e2e/e2e_lownodeutilization_test.go
@@ -0,0 +1,304 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"context"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
+	componentbaseconfig "k8s.io/component-base/config"
+	utilptr "k8s.io/utils/ptr"
+
+	"sigs.k8s.io/descheduler/pkg/api"
+	apiv1alpha2 "sigs.k8s.io/descheduler/pkg/api/v1alpha2"
+	"sigs.k8s.io/descheduler/pkg/descheduler/client"
+	"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
+	"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization"
+)
+
+func lowNodeUtilizationPolicy(lowNodeUtilizationArgs *nodeutilization.LowNodeUtilizationArgs, evictorArgs *defaultevictor.DefaultEvictorArgs, metricsCollectorEnabled bool) *apiv1alpha2.DeschedulerPolicy {
+	return &apiv1alpha2.DeschedulerPolicy{
+		MetricsCollector: apiv1alpha2.MetricsCollector{
+			Enabled: metricsCollectorEnabled,
+		},
+		Profiles: []apiv1alpha2.DeschedulerProfile{
+			{
+				Name: nodeutilization.LowNodeUtilizationPluginName + "Profile",
+				PluginConfigs: []apiv1alpha2.PluginConfig{
+					{
+						Name: nodeutilization.LowNodeUtilizationPluginName,
+						Args: runtime.RawExtension{
+							Object: lowNodeUtilizationArgs,
+						},
+					},
+					{
+						Name: defaultevictor.PluginName,
+						Args: runtime.RawExtension{
+							Object: evictorArgs,
+						},
+					},
+				},
+				Plugins: apiv1alpha2.Plugins{
+					Filter: apiv1alpha2.PluginSet{
+						Enabled: []string{
+							defaultevictor.PluginName,
+						},
+					},
+					Balance: apiv1alpha2.PluginSet{
+						Enabled: []string{
+							nodeutilization.LowNodeUtilizationPluginName,
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func TestLowNodeUtilizationKubernetesMetrics(t *testing.T) {
+	ctx := context.Background()
+
+	clientSet, err := client.CreateClient(componentbaseconfig.ClientConnectionConfiguration{Kubeconfig: os.Getenv("KUBECONFIG")}, "")
+	if err != nil {
+		t.Errorf("Error during kubernetes client creation with %v", err)
+	}
+
+	metricsClient, err := client.CreateMetricsClient(componentbaseconfig.ClientConnectionConfiguration{Kubeconfig: os.Getenv("KUBECONFIG")}, "descheduler")
+	if err != nil {
+		t.Errorf("Error during kubernetes metrics client creation with %v", err)
+	}
+
+	nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+	if err != nil {
+		t.Errorf("Error listing node with %v", err)
+	}
+
+	_, workerNodes := splitNodesAndWorkerNodes(nodeList.Items)
+
+	testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}}
+	t.Logf("Creating testing namespace %q", testNamespace.Name)
+	if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
+		t.Fatalf("Unable to create ns %v: %v", testNamespace.Name, err)
+	}
+	defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
+
+	t.Log("Creating duplicates pods")
+	testLabel := map[string]string{"app": "test-lownodeutilization-kubernetes-metrics", "name": "test-lownodeutilization-kubernetes-metrics"}
+	deploymentObj := buildTestDeployment("lownodeutilization-kubernetes-metrics-pod", testNamespace.Name, 0, testLabel, nil)
+	deploymentObj.Spec.Template.Spec.Containers[0].Image = "narmidm/k8s-pod-cpu-stressor:latest"
+	deploymentObj.Spec.Template.Spec.Containers[0].Args = []string{"-cpu=3", "-duration=10s", "-forever"}
+	deploymentObj.Spec.Template.Spec.Containers[0].Resources = v1.ResourceRequirements{
+		Limits: v1.ResourceList{
+			v1.ResourceCPU: resource.MustParse("3000m"),
+		},
+		Requests: v1.ResourceList{
+			v1.ResourceCPU: resource.MustParse("0m"),
+		},
+	}
+
+	tests := []struct {
+		name                    string
+		replicasNum             int
+		beforeFunc              func(deployment *appsv1.Deployment)
+		expectedEvictedPodCount int
+		lowNodeUtilizationArgs  *nodeutilization.LowNodeUtilizationArgs
+		evictorArgs             *defaultevictor.DefaultEvictorArgs
+		metricsCollectorEnabled bool
+	}{
+		{
+			name:        "metric server not enabled",
+			replicasNum: 4,
+			beforeFunc: func(deployment *appsv1.Deployment) {
+				deployment.Spec.Replicas = utilptr.To[int32](4)
+				deployment.Spec.Template.Spec.NodeName = workerNodes[0].Name
+			},
+			expectedEvictedPodCount: 0,
+			lowNodeUtilizationArgs: &nodeutilization.LowNodeUtilizationArgs{
+				Thresholds: api.ResourceThresholds{
+					v1.ResourceCPU:  30,
+					v1.ResourcePods: 30,
+				},
+				TargetThresholds: api.ResourceThresholds{
+					v1.ResourceCPU:  50,
+					v1.ResourcePods: 50,
+				},
+				MetricsUtilization: nodeutilization.MetricsUtilization{
+					MetricsServer: true,
+				},
+			},
+			evictorArgs:             &defaultevictor.DefaultEvictorArgs{},
+			metricsCollectorEnabled: false,
+		},
+		{
+			name:        "requested cpu resource zero, actual cpu utilization 3 per pod",
+			replicasNum: 4,
+			beforeFunc: func(deployment *appsv1.Deployment) {
+				deployment.Spec.Replicas = utilptr.To[int32](4)
+				deployment.Spec.Template.Spec.NodeName = workerNodes[0].Name
+			},
+			expectedEvictedPodCount: 2,
+			lowNodeUtilizationArgs: &nodeutilization.LowNodeUtilizationArgs{
+				Thresholds: api.ResourceThresholds{
+					v1.ResourceCPU:  30,
+					v1.ResourcePods: 30,
+				},
+				TargetThresholds: api.ResourceThresholds{
+					v1.ResourceCPU:  50,
+					v1.ResourcePods: 50,
+				},
+				MetricsUtilization: nodeutilization.MetricsUtilization{
+					MetricsServer: true,
+				},
+			},
+			evictorArgs:             &defaultevictor.DefaultEvictorArgs{},
+			metricsCollectorEnabled: true,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Logf("Creating deployment %v in %v namespace", deploymentObj.Name, deploymentObj.Namespace)
+			tc.beforeFunc(deploymentObj)
+
+			_, err = clientSet.AppsV1().Deployments(deploymentObj.Namespace).Create(ctx, deploymentObj, metav1.CreateOptions{})
+			if err != nil {
+				t.Logf("Error creating deployment: %v", err)
+				if err = clientSet.AppsV1().Deployments(deploymentObj.Namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{
+					LabelSelector: labels.SelectorFromSet(deploymentObj.Labels).String(),
+				}); err != nil {
+					t.Fatalf("Unable to delete deployment: %v", err)
+				}
+				return
+			}
+			defer func() {
+				clientSet.AppsV1().Deployments(deploymentObj.Namespace).Delete(ctx, deploymentObj.Name, metav1.DeleteOptions{})
+				waitForPodsToDisappear(ctx, t, clientSet, deploymentObj.Labels, deploymentObj.Namespace)
+			}()
+			waitForPodsRunning(ctx, t, clientSet, deploymentObj.Labels, tc.replicasNum, deploymentObj.Namespace)
+			// wait until workerNodes[0].Name has the right actual cpu utilization and all the testing pods are running
+			// and producing ~12 cores in total
+			wait.PollUntilWithContext(ctx, 5*time.Second, func(context.Context) (done bool, err error) {
+				item, err := metricsClient.MetricsV1beta1().NodeMetricses().Get(ctx, workerNodes[0].Name, metav1.GetOptions{})
+				t.Logf("Waiting for %q nodemetrics cpu utilization to get over 12, currently %v", workerNodes[0].Name, item.Usage.Cpu().Value())
+				if item.Usage.Cpu().Value() < 12 {
+					return false, nil
+				}
+				totalCpu := resource.NewMilliQuantity(0, resource.DecimalSI)
+				podItems, err := metricsClient.MetricsV1beta1().PodMetricses(deploymentObj.Namespace).List(ctx, metav1.ListOptions{})
+				if err != nil {
+					t.Logf("unable to list podmetricses: %v", err)
+					return false, nil
+				}
+				for _, podMetrics := range podItems.Items {
+					for _, container := range podMetrics.Containers {
+						if _, exists := container.Usage[v1.ResourceCPU]; !exists {
+							continue
+						}
+						totalCpu.Add(container.Usage[v1.ResourceCPU])
+					}
+				}
+				// Value() will round up (e.g. 11.1 -> 12), which is still ok
+				t.Logf("Waiting for totalCpu to get to 12 at least, got %v\n", totalCpu.Value())
+				return totalCpu.Value() >= 12, nil
+			})
+
+			preRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
+
+			// Deploy the descheduler with the configured policy
+			deschedulerPolicyConfigMapObj, err := deschedulerPolicyConfigMap(lowNodeUtilizationPolicy(tc.lowNodeUtilizationArgs, tc.evictorArgs, tc.metricsCollectorEnabled))
+			if err != nil {
+				t.Fatalf("Error creating %q CM: %v", deschedulerPolicyConfigMapObj.Name, err)
+			}
+
+			t.Logf("Creating %q policy CM with LowNodeUtilization configured...", deschedulerPolicyConfigMapObj.Name)
+			_, err = clientSet.CoreV1().ConfigMaps(deschedulerPolicyConfigMapObj.Namespace).Create(ctx, deschedulerPolicyConfigMapObj, metav1.CreateOptions{})
+			if err != nil {
+				t.Fatalf("Error creating %q CM: %v", deschedulerPolicyConfigMapObj.Name, err)
+			}
+
+			defer func() {
+				t.Logf("Deleting %q CM...", deschedulerPolicyConfigMapObj.Name)
+				err = clientSet.CoreV1().ConfigMaps(deschedulerPolicyConfigMapObj.Namespace).Delete(ctx, deschedulerPolicyConfigMapObj.Name, metav1.DeleteOptions{})
+				if err != nil {
+					t.Fatalf("Unable to delete %q CM: %v", deschedulerPolicyConfigMapObj.Name, err)
+				}
+			}()
+
+			deschedulerDeploymentObj := deschedulerDeployment(testNamespace.Name)
+			t.Logf("Creating descheduler deployment %v", deschedulerDeploymentObj.Name)
+			_, err = clientSet.AppsV1().Deployments(deschedulerDeploymentObj.Namespace).Create(ctx, deschedulerDeploymentObj, metav1.CreateOptions{})
+			if err != nil {
+				t.Fatalf("Error creating %q deployment: %v", deschedulerDeploymentObj.Name, err)
+			}
+
+			deschedulerPodName := ""
+			defer func() {
+				if deschedulerPodName != "" {
+					printPodLogs(ctx, t, clientSet, deschedulerPodName)
+				}
+
+				t.Logf("Deleting %q deployment...", deschedulerDeploymentObj.Name)
+				err = clientSet.AppsV1().Deployments(deschedulerDeploymentObj.Namespace).Delete(ctx, deschedulerDeploymentObj.Name, metav1.DeleteOptions{})
+				if err != nil {
+					t.Fatalf("Unable to delete %q deployment: %v", deschedulerDeploymentObj.Name, err)
+				}
+
+				waitForPodsToDisappear(ctx, t, clientSet, deschedulerDeploymentObj.Labels, deschedulerDeploymentObj.Namespace)
+			}()
+
+			t.Logf("Waiting for the descheduler pod running")
+			deschedulerPods := waitForPodsRunning(ctx, t, clientSet, deschedulerDeploymentObj.Labels, 1, deschedulerDeploymentObj.Namespace)
+			if len(deschedulerPods) != 0 {
+				deschedulerPodName = deschedulerPods[0].Name
+			}
+
+			// Run LowNodeUtilization plugin
+			var meetsExpectations bool
+			var actualEvictedPodCount int
+			if err = wait.PollUntilContextTimeout(ctx, 5*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) {
+				currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...)
+				actualEvictedPod := preRunNames.Difference(currentRunNames)
+				actualEvictedPodCount = actualEvictedPod.Len()
+				t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount)
+				if actualEvictedPodCount != tc.expectedEvictedPodCount {
+					t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount)
+					return false, nil
+				}
+				meetsExpectations = true
+				return true, nil
+			}); err != nil {
+				t.Errorf("Error waiting for descheduler running: %v", err)
+			}
+
+			if !meetsExpectations {
+				t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedPodCount, tc.expectedEvictedPodCount)
+			} else {
+				t.Logf("Total of %d Pods were evicted for %s", actualEvictedPodCount, tc.name)
+			}
+		})
+	}
+}
--- a/test/run-e2e-tests.sh
+++ b/test/run-e2e-tests.sh
@@ -99,5 +99,10 @@ if [ -z "${SKIP_KUBEVIRT_INSTALL}" ]; then
  kubectl -n kubevirt patch kubevirt kubevirt --type=merge --patch '{"spec":{"configuration":{"developerConfiguration":{"useEmulation":true}}}}'
 fi

+METRICS_SERVER_VERSION="v0.5.0"
+kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/${METRICS_SERVER_VERSION}/components.yaml
+kubectl patch -n kube-system deployment metrics-server --type=json \
+  -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
+
 PRJ_PREFIX="sigs.k8s.io/descheduler"
 go test ${PRJ_PREFIX}/test/e2e/ -v -timeout 0
--- a/test/test_utils.go
+++ b/test/test_utils.go
@@ -23,17 +23,17 @@ import (
 	"testing"
 	"time"

-	policyv1 "k8s.io/api/policy/v1"
-	"k8s.io/apimachinery/pkg/util/intstr"
-
 	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
+	policyv1 "k8s.io/api/policy/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/util/intstr"
 	"k8s.io/apimachinery/pkg/util/uuid"
 	"k8s.io/apimachinery/pkg/util/wait"
 	clientset "k8s.io/client-go/kubernetes"
+	"k8s.io/metrics/pkg/apis/metrics/v1beta1"
 	utilptr "k8s.io/utils/ptr"
 )

@@ -89,6 +89,26 @@ func BuildTestPDB(name, appLabel string) *policyv1.PodDisruptionBudget {
 	return pdb
 }

+// BuildPodMetrics creates a test podmetrics with given parameters.
+func BuildPodMetrics(name string, millicpu, mem int64) *v1beta1.PodMetrics {
+	return &v1beta1.PodMetrics{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: "default",
+		},
+		Window: metav1.Duration{Duration: 20010000000},
+		Containers: []v1beta1.ContainerMetrics{
+			{
+				Name: "container-1",
+				Usage: v1.ResourceList{
+					v1.ResourceCPU:    *resource.NewMilliQuantity(millicpu, resource.DecimalSI),
+					v1.ResourceMemory: *resource.NewQuantity(mem, resource.BinarySI),
+				},
+			},
+		},
+	}
+}
+
 // GetMirrorPodAnnotation returns the annotation needed for mirror pod.
 func GetMirrorPodAnnotation() map[string]string {
 	return map[string]string{
@@ -157,6 +177,19 @@ func BuildTestNode(name string, millicpu, mem, pods int64, apply func(*v1.Node))
 	return node
 }

+func BuildNodeMetrics(name string, millicpu, mem int64) *v1beta1.NodeMetrics {
+	return &v1beta1.NodeMetrics{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: name,
+		},
+		Window: metav1.Duration{Duration: 20010000000},
+		Usage: v1.ResourceList{
+			v1.ResourceCPU:    *resource.NewMilliQuantity(millicpu, resource.DecimalSI),
+			v1.ResourceMemory: *resource.NewQuantity(mem, resource.BinarySI),
+		},
+	}
+}
+
 // MakeBestEffortPod makes the given pod a BestEffort pod
 func MakeBestEffortPod(pod *v1.Pod) {
 	pod.Spec.Containers[0].Resources.Requests = nil