Merge pull request #285 from lixiang233/Ft_change_lowUtilization_break

Change break condition and thresholds validation for lowUtilization
2026-01-26 05:14:13 +01:00 · 2020-05-28 07:36:02 -07:00
parent 003a4cdc2b 2a8dc69cbb
commit 616a9b5f6b
3 changed files with 284 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -95,7 +95,8 @@ usage is below threshold for all (cpu, memory, and number of pods), the node is
 Currently, pods request resource requirements are considered for computing node resource utilization.

 There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes
-from where pods could be evicted. Any node, between the thresholds, `thresholds` and `targetThresholds` is
+from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, or number of pods),
+the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is
 considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`,
 can be configured for cpu, memory, and number of pods too in terms of percentage.

@@ -120,6 +121,15 @@ strategies:
           "pods": 50
 ```

+Policy should pass the following validation checks:
+* Only three types of resources are supported: `cpu`, `memory` and `pods`.
+* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources.
+* The valid range of the resource's percentage value is \[0, 100\]
+* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
+
+If any of the resource types is not specified, all its thresholds default to 100% to avoid nodes going
+from underutilized to overutilized.
+
 There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
 This parameter can be configured to activate the strategy only when the number of under utilized nodes
 are above the configured value. This could be helpful in large clusters where a few nodes could go
--- a/pkg/descheduler/strategies/lownodeutilization.go
+++ b/pkg/descheduler/strategies/lownodeutilization.go
@@ -18,6 +18,7 @@ package strategies

 import (
 	"context"
+	"fmt"
 	"sort"

 	v1 "k8s.io/api/core/v1"
@@ -40,6 +41,11 @@ type NodeUsageMap struct {

 type NodePodsMap map[*v1.Node][]*v1.Pod

+const (
+	MinResourcePercentage = 0
+	MaxResourcePercentage = 100
+)
+
 func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
 	// todo: move to config validation?
 	// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
@@ -49,12 +55,23 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
 	}

 	thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
-	if !validateThresholds(thresholds) {
+	targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
+	if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
+		klog.Errorf("LowNodeUtilization config is not valid: %v", err)
 		return
 	}
-	targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
-	if !validateTargetThresholds(targetThresholds) {
-		return
+	// check if Pods/CPU/Mem are set, if not, set them to 100
+	if _, ok := thresholds[v1.ResourcePods]; !ok {
+		thresholds[v1.ResourcePods] = MaxResourcePercentage
+		targetThresholds[v1.ResourcePods] = MaxResourcePercentage
+	}
+	if _, ok := thresholds[v1.ResourceCPU]; !ok {
+		thresholds[v1.ResourceCPU] = MaxResourcePercentage
+		targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
+	}
+	if _, ok := thresholds[v1.ResourceMemory]; !ok {
+		thresholds[v1.ResourceMemory] = MaxResourcePercentage
+		targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
 	}

 	npm := createNodePodsMap(ctx, client, nodes)
@@ -99,37 +116,46 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
 	klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
 }

-func validateThresholds(thresholds api.ResourceThresholds) bool {
-	if thresholds == nil || len(thresholds) == 0 {
-		klog.V(1).Infof("no resource threshold is configured")
-		return false
+// validateStrategyConfig checks if the strategy's config is valid
+func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
+	// validate thresholds and targetThresholds config
+	if err := validateThresholds(thresholds); err != nil {
+		return fmt.Errorf("thresholds config is not valid: %v", err)
 	}
-	for name := range thresholds {
-		switch name {
-		case v1.ResourceCPU:
-			continue
-		case v1.ResourceMemory:
-			continue
-		case v1.ResourcePods:
-			continue
-		default:
-			klog.Errorf("only cpu, memory, or pods thresholds can be specified")
-			return false
+	if err := validateThresholds(targetThresholds); err != nil {
+		return fmt.Errorf("targetThresholds config is not valid: %v", err)
+	}
+
+	// validate if thresholds and targetThresholds have same resources configured
+	if len(thresholds) != len(targetThresholds) {
+		return fmt.Errorf("thresholds and targetThresholds configured different resources")
+	}
+	for resourceName, value := range thresholds {
+		if targetValue, ok := targetThresholds[resourceName]; !ok {
+			return fmt.Errorf("thresholds and targetThresholds configured different resources")
+		} else if value > targetValue {
+			return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
 		}
 	}
-	return true
+	return nil
 }

-//This function could be merged into above once we are clear.
-func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool {
-	if targetThresholds == nil {
-		klog.V(1).Infof("no target resource threshold is configured")
-		return false
-	} else if _, ok := targetThresholds[v1.ResourcePods]; !ok {
-		klog.V(1).Infof("no target resource threshold for pods is configured")
-		return false
+// validateThresholds checks if thresholds have valid resource name and resource percentage configured
+func validateThresholds(thresholds api.ResourceThresholds) error {
+	if thresholds == nil || len(thresholds) == 0 {
+		return fmt.Errorf("no resource threshold is configured")
 	}
-	return true
+	for name, percent := range thresholds {
+		switch name {
+		case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
+			if percent < MinResourcePercentage || percent > MaxResourcePercentage {
+				return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage)
+			}
+		default:
+			return fmt.Errorf("only cpu, memory, or pods thresholds can be specified")
+		}
+	}
+	return nil
 }

 // classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
@@ -184,17 +210,13 @@ func evictPodsFromTargetNodes(
 		totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100)

 		// totalCPU capacity to be moved
-		if _, ok := targetThresholds[v1.ResourceCPU]; ok {
 		cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
 		totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
-		}

 		// totalMem capacity to be moved
-		if _, ok := targetThresholds[v1.ResourceMemory]; ok {
 		memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
 		totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
 	}
-	}

 	klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods)
 	klog.V(1).Infof("********Number of pods evicted from each node:***********")
@@ -246,7 +268,8 @@ func evictPods(
 	taintsOfLowNodes map[string][]v1.Taint,
 	podEvictor *evictions.PodEvictor,
 	node *v1.Node) {
-	if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
+	// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
+	if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && *totalPods > 0 && *totalCPU > 0 && *totalMem > 0 {
 		onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
 		for _, pod := range inputPods {
 			if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
@@ -278,8 +301,8 @@ func evictPods(
 				nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100)

 				klog.V(3).Infof("updated node usage: %#v", nodeUsage)
-				// check if node utilization drops below target threshold or required capacity (cpu, memory, pods) is moved
-				if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || (*totalPods <= 0 && *totalCPU <= 0 && *totalMem <= 0) {
+				// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
+				if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || *totalPods <= 0 || *totalCPU <= 0 || *totalMem <= 0 {
 					break
 				}
 			}
--- a/pkg/descheduler/strategies/lownodeutilization_test.go
+++ b/pkg/descheduler/strategies/lownodeutilization_test.go
@@ -80,6 +80,9 @@ func TestLowNodeUtilization(t *testing.T) {
 		thresholds, targetThresholds api.ResourceThresholds
 		nodes                        map[string]*v1.Node
 		pods                         map[string]*v1.PodList
+		// TODO: divide expectedPodsEvicted into two params like other tests
+		// expectedPodsEvicted should be the result num of pods that this testCase expected but now it represents both
+		// MaxNoOfPodsToEvictPerNode and the test's expected result
 		expectedPodsEvicted int
 		evictedPods         []string
 	}{
@@ -141,6 +144,65 @@ func TestLowNodeUtilization(t *testing.T) {
 			},
 			expectedPodsEvicted: 3,
 		},
+		{
+			name: "without priorities stop when cpu capacity is depleted",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:  30,
+				v1.ResourcePods: 30,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:  50,
+				v1.ResourcePods: 50,
+			},
+			nodes: map[string]*v1.Node{
+				n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
+				n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
+				n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
+			},
+			pods: map[string]*v1.PodList{
+				n1NodeName: {
+					Items: []v1.Pod{
+						*test.BuildTestPod("p1", 400, 300, n1NodeName, setRSOwnerRef),
+						*test.BuildTestPod("p2", 400, 300, n1NodeName, setRSOwnerRef),
+						*test.BuildTestPod("p3", 400, 300, n1NodeName, setRSOwnerRef),
+						*test.BuildTestPod("p4", 400, 300, n1NodeName, setRSOwnerRef),
+						*test.BuildTestPod("p5", 400, 300, n1NodeName, setRSOwnerRef),
+						// These won't be evicted.
+						*test.BuildTestPod("p6", 400, 300, n1NodeName, setDSOwnerRef),
+						*test.BuildTestPod("p7", 400, 300, n1NodeName, func(pod *v1.Pod) {
+							// A pod with local storage.
+							setNormalOwnerRef(pod)
+							pod.Spec.Volumes = []v1.Volume{
+								{
+									Name: "sample",
+									VolumeSource: v1.VolumeSource{
+										HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
+										EmptyDir: &v1.EmptyDirVolumeSource{
+											SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
+									},
+								},
+							}
+							// A Mirror Pod.
+							pod.Annotations = test.GetMirrorPodAnnotation()
+						}),
+						*test.BuildTestPod("p8", 400, 300, n1NodeName, func(pod *v1.Pod) {
+							// A Critical Pod.
+							pod.Namespace = "kube-system"
+							priority := utils.SystemCriticalPriority
+							pod.Spec.Priority = &priority
+						}),
+					},
+				},
+				n2NodeName: {
+					Items: []v1.Pod{
+						*test.BuildTestPod("p9", 400, 2100, n1NodeName, setRSOwnerRef),
+					},
+				},
+				n3NodeName: {},
+			},
+			// 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before cpu is depleted
+			expectedPodsEvicted: 3,
+		},
 		{
 			name: "with priorities",
 			thresholds: api.ResourceThresholds{
@@ -346,11 +408,6 @@ func TestLowNodeUtilization(t *testing.T) {
 				nodes = append(nodes, node)
 			}

-			npm := createNodePodsMap(ctx, fakeClient, nodes)
-			lowNodes, targetNodes := classifyNodes(npm, test.thresholds, test.targetThresholds, false)
-			if len(lowNodes) != 1 {
-				t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
-			}
 			podEvictor := evictions.NewPodEvictor(
 				fakeClient,
 				"v1",
@@ -359,7 +416,17 @@ func TestLowNodeUtilization(t *testing.T) {
 				nodes,
 			)

-			evictPodsFromTargetNodes(ctx, targetNodes, lowNodes, test.targetThresholds, false, podEvictor)
+			strategy := api.DeschedulerStrategy{
+				Enabled: true,
+				Params: api.StrategyParameters{
+					NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
+						Thresholds:       test.thresholds,
+						TargetThresholds: test.targetThresholds,
+					},
+				},
+			}
+			LowNodeUtilization(ctx, fakeClient, strategy, nodes, false, podEvictor)
+
 			podsEvicted := podEvictor.TotalEvicted()
 			if test.expectedPodsEvicted != podsEvicted {
 				t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted)
@@ -409,21 +476,120 @@ func TestSortPodsByPriority(t *testing.T) {
 	}
 }

+func TestValidateStrategyConfig(t *testing.T) {
+	tests := []struct {
+		name             string
+		thresholds       api.ResourceThresholds
+		targetThresholds api.ResourceThresholds
+		errInfo          error
+	}{
+		{
+			name: "passing invalid thresholds",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    20,
+				v1.ResourceMemory: 120,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    80,
+				v1.ResourceMemory: 80,
+			},
+			errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
+				"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
+		},
+		{
+			name: "passing invalid targetThresholds",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    20,
+				v1.ResourceMemory: 20,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    80,
+				"resourceInvalid": 80,
+			},
+			errInfo: fmt.Errorf("targetThresholds config is not valid: %v",
+				fmt.Errorf("only cpu, memory, or pods thresholds can be specified")),
+		},
+		{
+			name: "thresholds and targetThresholds configured different num of resources",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    20,
+				v1.ResourceMemory: 20,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    80,
+				v1.ResourceMemory: 80,
+				v1.ResourcePods:   80,
+			},
+			errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
+		},
+		{
+			name: "thresholds and targetThresholds configured different resources",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    20,
+				v1.ResourceMemory: 20,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:  80,
+				v1.ResourcePods: 80,
+			},
+			errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
+		},
+		{
+			name: "thresholds' CPU config value is greater than targetThresholds'",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    90,
+				v1.ResourceMemory: 20,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    80,
+				v1.ResourceMemory: 80,
+			},
+			errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU),
+		},
+		{
+			name: "passing valid strategy config",
+			thresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    20,
+				v1.ResourceMemory: 20,
+			},
+			targetThresholds: api.ResourceThresholds{
+				v1.ResourceCPU:    80,
+				v1.ResourceMemory: 80,
+			},
+			errInfo: nil,
+		},
+	}
+
+	for _, testCase := range tests {
+		validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds)
+
+		if validateErr == nil || testCase.errInfo == nil {
+			if validateErr != testCase.errInfo {
+				t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead",
+					testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
+			}
+		} else if validateErr.Error() != testCase.errInfo.Error() {
+			t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead",
+				testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
+		}
+	}
+}
+
 func TestValidateThresholds(t *testing.T) {
 	tests := []struct {
 		name    string
 		input   api.ResourceThresholds
-		succeed bool
+		errInfo error
 	}{
 		{
 			name:    "passing nil map for threshold",
 			input:   nil,
-			succeed: false,
+			errInfo: fmt.Errorf("no resource threshold is configured"),
 		},
 		{
 			name:    "passing no threshold",
 			input:   api.ResourceThresholds{},
-			succeed: false,
+			errInfo: fmt.Errorf("no resource threshold is configured"),
 		},
 		{
 			name: "passing unsupported resource name",
@@ -431,7 +597,7 @@ func TestValidateThresholds(t *testing.T) {
 				v1.ResourceCPU:     40,
 				v1.ResourceStorage: 25.5,
 			},
-			succeed: false,
+			errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"),
 		},
 		{
 			name: "passing invalid resource name",
@@ -439,7 +605,30 @@ func TestValidateThresholds(t *testing.T) {
 				v1.ResourceCPU: 40,
 				"coolResource": 42.0,
 			},
-			succeed: false,
+			errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"),
+		},
+		{
+			name: "passing invalid resource value",
+			input: api.ResourceThresholds{
+				v1.ResourceCPU:    110,
+				v1.ResourceMemory: 80,
+			},
+			errInfo: fmt.Errorf("%v threshold not in [%v, %v] range", v1.ResourceCPU, MinResourcePercentage, MaxResourcePercentage),
+		},
+		{
+			name: "passing a valid threshold with max and min resource value",
+			input: api.ResourceThresholds{
+				v1.ResourceCPU:    100,
+				v1.ResourceMemory: 0,
+			},
+			errInfo: nil,
+		},
+		{
+			name: "passing a valid threshold with only cpu",
+			input: api.ResourceThresholds{
+				v1.ResourceCPU: 80,
+			},
+			errInfo: nil,
 		},
 		{
 			name: "passing a valid threshold with cpu, memory and pods",
@@ -448,15 +637,19 @@ func TestValidateThresholds(t *testing.T) {
 				v1.ResourceMemory: 30,
 				v1.ResourcePods:   40,
 			},
-			succeed: true,
+			errInfo: nil,
 		},
 	}

 	for _, test := range tests {
-		isValid := validateThresholds(test.input)
+		validateErr := validateThresholds(test.input)

-		if isValid != test.succeed {
-			t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.succeed, isValid)
+		if validateErr == nil || test.errInfo == nil {
+			if validateErr != test.errInfo {
+				t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr)
+			}
+		} else if validateErr.Error() != test.errInfo.Error() {
+			t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr)
 		}
 	}
 }