From 2a8dc69cbb40170406506c1aeb7832f4a665fe86 Mon Sep 17 00:00:00 2001 From: lixiang Date: Wed, 20 May 2020 15:01:30 +0800 Subject: [PATCH] Stop condition and config validation change for lowUtilization 1.Set default CPU/Mem/Pods percentage of thresholds to 100 2.Stop evicting pods if any resource ran out 3.Add thresholds verification method and limit resource percentage within [0, 100] 4.Change testcases and readme --- README.md | 12 +- .../strategies/lownodeutilization.go | 103 +++++--- .../strategies/lownodeutilization_test.go | 227 ++++++++++++++++-- 3 files changed, 284 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index e01f3a6c9..5b47eed8d 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,8 @@ usage is below threshold for all (cpu, memory, and number of pods), the node is Currently, pods request resource requirements are considered for computing node resource utilization. There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes -from where pods could be evicted. Any node, between the thresholds, `thresholds` and `targetThresholds` is +from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, or number of pods), +the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`, can be configured for cpu, memory, and number of pods too in terms of percentage. @@ -114,6 +115,15 @@ strategies: "pods": 50 ``` +Policy should pass the following validation checks: +* Only three types of resources are supported: `cpu`, `memory` and `pods`. +* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources. +* The valid range of the resource's percentage value is \[0, 100\] +* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource. + +If any of the resource types is not specified, all its thresholds default to 100% to avoid nodes going +from underutilized to overutilized. + There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`. This parameter can be configured to activate the strategy only when the number of under utilized nodes are above the configured value. This could be helpful in large clusters where a few nodes could go diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 9fed3fdfe..e95aa118d 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -18,6 +18,7 @@ package strategies import ( "context" + "fmt" "sort" v1 "k8s.io/api/core/v1" @@ -40,6 +41,11 @@ type NodeUsageMap struct { type NodePodsMap map[*v1.Node][]*v1.Pod +const ( + MinResourcePercentage = 0 + MaxResourcePercentage = 100 +) + func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { if !strategy.Enabled { return @@ -52,12 +58,23 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg } thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds - if !validateThresholds(thresholds) { + targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds + if err := validateStrategyConfig(thresholds, targetThresholds); err != nil { + klog.Errorf("LowNodeUtilization config is not valid: %v", err) return } - targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds - if !validateTargetThresholds(targetThresholds) { - return + // check if Pods/CPU/Mem are set, if not, set them to 100 + if _, ok := thresholds[v1.ResourcePods]; !ok { + thresholds[v1.ResourcePods] = MaxResourcePercentage + targetThresholds[v1.ResourcePods] = MaxResourcePercentage + } + if _, ok := thresholds[v1.ResourceCPU]; !ok { + thresholds[v1.ResourceCPU] = MaxResourcePercentage + targetThresholds[v1.ResourceCPU] = MaxResourcePercentage + } + if _, ok := thresholds[v1.ResourceMemory]; !ok { + thresholds[v1.ResourceMemory] = MaxResourcePercentage + targetThresholds[v1.ResourceMemory] = MaxResourcePercentage } npm := createNodePodsMap(ctx, client, nodes) @@ -102,37 +119,46 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted()) } -func validateThresholds(thresholds api.ResourceThresholds) bool { - if thresholds == nil || len(thresholds) == 0 { - klog.V(1).Infof("no resource threshold is configured") - return false +// validateStrategyConfig checks if the strategy's config is valid +func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error { + // validate thresholds and targetThresholds config + if err := validateThresholds(thresholds); err != nil { + return fmt.Errorf("thresholds config is not valid: %v", err) } - for name := range thresholds { - switch name { - case v1.ResourceCPU: - continue - case v1.ResourceMemory: - continue - case v1.ResourcePods: - continue - default: - klog.Errorf("only cpu, memory, or pods thresholds can be specified") - return false + if err := validateThresholds(targetThresholds); err != nil { + return fmt.Errorf("targetThresholds config is not valid: %v", err) + } + + // validate if thresholds and targetThresholds have same resources configured + if len(thresholds) != len(targetThresholds) { + return fmt.Errorf("thresholds and targetThresholds configured different resources") + } + for resourceName, value := range thresholds { + if targetValue, ok := targetThresholds[resourceName]; !ok { + return fmt.Errorf("thresholds and targetThresholds configured different resources") + } else if value > targetValue { + return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName) } } - return true + return nil } -//This function could be merged into above once we are clear. -func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool { - if targetThresholds == nil { - klog.V(1).Infof("no target resource threshold is configured") - return false - } else if _, ok := targetThresholds[v1.ResourcePods]; !ok { - klog.V(1).Infof("no target resource threshold for pods is configured") - return false +// validateThresholds checks if thresholds have valid resource name and resource percentage configured +func validateThresholds(thresholds api.ResourceThresholds) error { + if thresholds == nil || len(thresholds) == 0 { + return fmt.Errorf("no resource threshold is configured") } - return true + for name, percent := range thresholds { + switch name { + case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods: + if percent < MinResourcePercentage || percent > MaxResourcePercentage { + return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage) + } + default: + return fmt.Errorf("only cpu, memory, or pods thresholds can be specified") + } + } + return nil } // classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between @@ -187,16 +213,12 @@ func evictPodsFromTargetNodes( totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100) // totalCPU capacity to be moved - if _, ok := targetThresholds[v1.ResourceCPU]; ok { - cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU] - totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100) - } + cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU] + totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100) // totalMem capacity to be moved - if _, ok := targetThresholds[v1.ResourceMemory]; ok { - memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory] - totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100) - } + memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory] + totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100) } klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods) @@ -249,7 +271,8 @@ func evictPods( taintsOfLowNodes map[string][]v1.Taint, podEvictor *evictions.PodEvictor, node *v1.Node) { - if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) { + // stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved + if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && *totalPods > 0 && *totalCPU > 0 && *totalMem > 0 { onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value())) for _, pod := range inputPods { if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { @@ -280,8 +303,8 @@ func evictPods( nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100) klog.V(3).Infof("updated node usage: %#v", nodeUsage) - // check if node utilization drops below target threshold or required capacity (cpu, memory, pods) is moved - if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || (*totalPods <= 0 && *totalCPU <= 0 && *totalMem <= 0) { + // check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved + if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || *totalPods <= 0 || *totalCPU <= 0 || *totalMem <= 0 { break } } diff --git a/pkg/descheduler/strategies/lownodeutilization_test.go b/pkg/descheduler/strategies/lownodeutilization_test.go index 78441d715..67d770ec7 100644 --- a/pkg/descheduler/strategies/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/lownodeutilization_test.go @@ -80,8 +80,11 @@ func TestLowNodeUtilization(t *testing.T) { thresholds, targetThresholds api.ResourceThresholds nodes map[string]*v1.Node pods map[string]*v1.PodList - expectedPodsEvicted int - evictedPods []string + // TODO: divide expectedPodsEvicted into two params like other tests + // expectedPodsEvicted should be the result num of pods that this testCase expected but now it represents both + // MaxNoOfPodsToEvictPerNode and the test's expected result + expectedPodsEvicted int + evictedPods []string }{ { name: "without priorities", @@ -141,6 +144,65 @@ func TestLowNodeUtilization(t *testing.T) { }, expectedPodsEvicted: 3, }, + { + name: "without priorities stop when cpu capacity is depleted", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 30, + v1.ResourcePods: 30, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 50, + v1.ResourcePods: 50, + }, + nodes: map[string]*v1.Node{ + n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable), + }, + pods: map[string]*v1.PodList{ + n1NodeName: { + Items: []v1.Pod{ + *test.BuildTestPod("p1", 400, 300, n1NodeName, setRSOwnerRef), + *test.BuildTestPod("p2", 400, 300, n1NodeName, setRSOwnerRef), + *test.BuildTestPod("p3", 400, 300, n1NodeName, setRSOwnerRef), + *test.BuildTestPod("p4", 400, 300, n1NodeName, setRSOwnerRef), + *test.BuildTestPod("p5", 400, 300, n1NodeName, setRSOwnerRef), + // These won't be evicted. + *test.BuildTestPod("p6", 400, 300, n1NodeName, setDSOwnerRef), + *test.BuildTestPod("p7", 400, 300, n1NodeName, func(pod *v1.Pod) { + // A pod with local storage. + setNormalOwnerRef(pod) + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + *test.BuildTestPod("p8", 400, 300, n1NodeName, func(pod *v1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + }, + }, + n2NodeName: { + Items: []v1.Pod{ + *test.BuildTestPod("p9", 400, 2100, n1NodeName, setRSOwnerRef), + }, + }, + n3NodeName: {}, + }, + // 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before cpu is depleted + expectedPodsEvicted: 3, + }, { name: "with priorities", thresholds: api.ResourceThresholds{ @@ -346,11 +408,6 @@ func TestLowNodeUtilization(t *testing.T) { nodes = append(nodes, node) } - npm := createNodePodsMap(ctx, fakeClient, nodes) - lowNodes, targetNodes := classifyNodes(npm, test.thresholds, test.targetThresholds, false) - if len(lowNodes) != 1 { - t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.") - } podEvictor := evictions.NewPodEvictor( fakeClient, "v1", @@ -359,7 +416,17 @@ func TestLowNodeUtilization(t *testing.T) { nodes, ) - evictPodsFromTargetNodes(ctx, targetNodes, lowNodes, test.targetThresholds, false, podEvictor) + strategy := api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{ + Thresholds: test.thresholds, + TargetThresholds: test.targetThresholds, + }, + }, + } + LowNodeUtilization(ctx, fakeClient, strategy, nodes, false, podEvictor) + podsEvicted := podEvictor.TotalEvicted() if test.expectedPodsEvicted != podsEvicted { t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted) @@ -409,21 +476,120 @@ func TestSortPodsByPriority(t *testing.T) { } } +func TestValidateStrategyConfig(t *testing.T) { + tests := []struct { + name string + thresholds api.ResourceThresholds + targetThresholds api.ResourceThresholds + errInfo error + }{ + { + name: "passing invalid thresholds", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 120, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + }, + errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf( + "%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)), + }, + { + name: "passing invalid targetThresholds", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + "resourceInvalid": 80, + }, + errInfo: fmt.Errorf("targetThresholds config is not valid: %v", + fmt.Errorf("only cpu, memory, or pods thresholds can be specified")), + }, + { + name: "thresholds and targetThresholds configured different num of resources", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + v1.ResourcePods: 80, + }, + errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), + }, + { + name: "thresholds and targetThresholds configured different resources", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourcePods: 80, + }, + errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), + }, + { + name: "thresholds' CPU config value is greater than targetThresholds'", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 90, + v1.ResourceMemory: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + }, + errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU), + }, + { + name: "passing valid strategy config", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + }, + errInfo: nil, + }, + } + + for _, testCase := range tests { + validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds) + + if validateErr == nil || testCase.errInfo == nil { + if validateErr != testCase.errInfo { + t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead", + testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr) + } + } else if validateErr.Error() != testCase.errInfo.Error() { + t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead", + testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr) + } + } +} + func TestValidateThresholds(t *testing.T) { tests := []struct { name string input api.ResourceThresholds - succeed bool + errInfo error }{ { name: "passing nil map for threshold", input: nil, - succeed: false, + errInfo: fmt.Errorf("no resource threshold is configured"), }, { name: "passing no threshold", input: api.ResourceThresholds{}, - succeed: false, + errInfo: fmt.Errorf("no resource threshold is configured"), }, { name: "passing unsupported resource name", @@ -431,7 +597,7 @@ func TestValidateThresholds(t *testing.T) { v1.ResourceCPU: 40, v1.ResourceStorage: 25.5, }, - succeed: false, + errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"), }, { name: "passing invalid resource name", @@ -439,7 +605,30 @@ func TestValidateThresholds(t *testing.T) { v1.ResourceCPU: 40, "coolResource": 42.0, }, - succeed: false, + errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"), + }, + { + name: "passing invalid resource value", + input: api.ResourceThresholds{ + v1.ResourceCPU: 110, + v1.ResourceMemory: 80, + }, + errInfo: fmt.Errorf("%v threshold not in [%v, %v] range", v1.ResourceCPU, MinResourcePercentage, MaxResourcePercentage), + }, + { + name: "passing a valid threshold with max and min resource value", + input: api.ResourceThresholds{ + v1.ResourceCPU: 100, + v1.ResourceMemory: 0, + }, + errInfo: nil, + }, + { + name: "passing a valid threshold with only cpu", + input: api.ResourceThresholds{ + v1.ResourceCPU: 80, + }, + errInfo: nil, }, { name: "passing a valid threshold with cpu, memory and pods", @@ -448,15 +637,19 @@ func TestValidateThresholds(t *testing.T) { v1.ResourceMemory: 30, v1.ResourcePods: 40, }, - succeed: true, + errInfo: nil, }, } for _, test := range tests { - isValid := validateThresholds(test.input) + validateErr := validateThresholds(test.input) - if isValid != test.succeed { - t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.succeed, isValid) + if validateErr == nil || test.errInfo == nil { + if validateErr != test.errInfo { + t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr) + } + } else if validateErr.Error() != test.errInfo.Error() { + t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr) } } }