From 8ebf3fb32394d5a01d640d4935c5fee2fe4c79a1 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Fri, 11 Mar 2022 12:08:30 +0100 Subject: [PATCH 1/2] nodeutilization: move node resource threshold value computation under a separate function --- .../nodeutilization/nodeutilization.go | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index d53d24a13..a96baa6a0 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -92,33 +92,28 @@ func getNodeUsage( continue } - // A threshold is in percentages but in <0;100> interval. - // Performing `threshold * 0.01` will convert <0;100> interval into <0;1>. - // Multiplying it with capacity will give fraction of the capacity corresponding to the given high/low resource threshold in Quantity units. nodeCapacity := node.Status.Capacity if len(node.Status.Allocatable) > 0 { nodeCapacity = node.Status.Allocatable } lowResourceThreshold := map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(lowThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI), - v1.ResourceMemory: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI), - v1.ResourcePods: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI), + v1.ResourceCPU: resourceThreshold(nodeCapacity, v1.ResourceCPU, lowThreshold[v1.ResourceCPU]), + v1.ResourceMemory: resourceThreshold(nodeCapacity, v1.ResourceMemory, lowThreshold[v1.ResourceMemory]), + v1.ResourcePods: resourceThreshold(nodeCapacity, v1.ResourcePods, lowThreshold[v1.ResourcePods]), } for _, name := range resourceNames { if !isBasicResource(name) { - cap := nodeCapacity[name] - lowResourceThreshold[name] = resource.NewQuantity(int64(float64(lowThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI) + lowResourceThreshold[name] = resourceThreshold(nodeCapacity, name, lowThreshold[name]) } } highResourceThreshold := map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(highThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI), - v1.ResourceMemory: resource.NewQuantity(int64(float64(highThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI), - v1.ResourcePods: resource.NewQuantity(int64(float64(highThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI), + v1.ResourceCPU: resourceThreshold(nodeCapacity, v1.ResourceCPU, highThreshold[v1.ResourceCPU]), + v1.ResourceMemory: resourceThreshold(nodeCapacity, v1.ResourceMemory, highThreshold[v1.ResourceMemory]), + v1.ResourcePods: resourceThreshold(nodeCapacity, v1.ResourcePods, highThreshold[v1.ResourcePods]), } for _, name := range resourceNames { if !isBasicResource(name) { - cap := nodeCapacity[name] - highResourceThreshold[name] = resource.NewQuantity(int64(float64(highThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI) + highResourceThreshold[name] = resourceThreshold(nodeCapacity, name, highThreshold[name]) } } @@ -134,6 +129,27 @@ func getNodeUsage( return nodeUsageList } +func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity { + defaultFormat := resource.DecimalSI + if resourceName == v1.ResourceMemory { + defaultFormat = resource.BinarySI + } + + resourceCapacityFraction := func(resourceNodeCapacity int64) int64 { + // A threshold is in percentages but in <0;100> interval. + // Performing `threshold * 0.01` will convert <0;100> interval into <0;1>. + // Multiplying it with capacity will give fraction of the capacity corresponding to the given resource threshold in Quantity units. + return int64(float64(threshold) * 0.01 * float64(resourceNodeCapacity)) + } + + resourceCapacityQuantity := nodeCapacity.Name(resourceName, defaultFormat) + + if resourceName == v1.ResourceCPU { + return resource.NewMilliQuantity(resourceCapacityFraction(resourceCapacityQuantity.MilliValue()), defaultFormat) + } + return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), defaultFormat) +} + func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 { nodeCapacity := nodeUsage.node.Status.Capacity if len(nodeUsage.node.Status.Allocatable) > 0 { From 52ff50f2d17ca41d7d50318148b8496c3a96057c Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Fri, 11 Mar 2022 13:31:04 +0100 Subject: [PATCH 2/2] Promote NodeUsage to NodeInfo, evaluate thresholds separately --- .../nodeutilization/highnodeutilization.go | 13 +- .../nodeutilization/lownodeutilization.go | 15 +- .../nodeutilization/nodeutilization.go | 128 ++++++++++-------- 3 files changed, 84 insertions(+), 72 deletions(-) diff --git a/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go b/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go index 4a340b8fb..aa90c317b 100644 --- a/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go @@ -63,16 +63,17 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate resourceNames := getResourceNames(targetThresholds) sourceNodes, highNodes := classifyNodes( - getNodeUsage(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode), - func(node *v1.Node, usage NodeUsage) bool { - return isNodeWithLowUtilization(usage) + getNodeUsage(nodes, resourceNames, getPodsAssignedToNode), + getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames), + func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { + return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) }, - func(node *v1.Node, usage NodeUsage) bool { + func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { if nodeutil.IsNodeUnschedulable(node) { klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(node)) return false } - return !isNodeWithLowUtilization(usage) + return !isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) }) // log message in one line @@ -110,7 +111,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) // stop if the total available usage has dropped to zero - no more pods can be scheduled - continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { + continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { for name := range totalAvailableUsage { if totalAvailableUsage[name].CmpInt64(0) < 1 { return false diff --git a/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go b/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go index 725d7910a..dde109359 100644 --- a/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go @@ -73,17 +73,18 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg resourceNames := getResourceNames(thresholds) lowNodes, sourceNodes := classifyNodes( - getNodeUsage(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode), + getNodeUsage(nodes, resourceNames, getPodsAssignedToNode), + getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames), // The node has to be schedulable (to be able to move workload there) - func(node *v1.Node, usage NodeUsage) bool { + func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { if nodeutil.IsNodeUnschedulable(node) { klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node)) return false } - return isNodeWithLowUtilization(usage) + return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold) }, - func(node *v1.Node, usage NodeUsage) bool { - return isNodeAboveTargetUtilization(usage) + func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool { + return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold) }, ) @@ -138,8 +139,8 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit)) // stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved - continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { - if !isNodeAboveTargetUtilization(nodeUsage) { + continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool { + if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) { return false } for name := range totalAvailableUsage { diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index a96baa6a0..f9259de34 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -36,12 +36,19 @@ type NodeUsage struct { node *v1.Node usage map[v1.ResourceName]*resource.Quantity allPods []*v1.Pod +} +type NodeThresholds struct { lowResourceThreshold map[v1.ResourceName]*resource.Quantity highResourceThreshold map[v1.ResourceName]*resource.Quantity } -type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool +type NodeInfo struct { + NodeUsage + thresholds NodeThresholds +} + +type continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool // NodePodsMap is a set of (node, pods) pairs type NodePodsMap map[*v1.Node][]*v1.Pod @@ -77,10 +84,35 @@ func validateThresholds(thresholds api.ResourceThresholds) error { return nil } -func getNodeUsage( +func getNodeThresholds( nodes []*v1.Node, lowThreshold, highThreshold api.ResourceThresholds, resourceNames []v1.ResourceName, +) map[string]NodeThresholds { + nodeThresholdsMap := map[string]NodeThresholds{} + for _, node := range nodes { + nodeCapacity := node.Status.Capacity + if len(node.Status.Allocatable) > 0 { + nodeCapacity = node.Status.Allocatable + } + + nodeThresholdsMap[node.Name] = NodeThresholds{ + lowResourceThreshold: map[v1.ResourceName]*resource.Quantity{}, + highResourceThreshold: map[v1.ResourceName]*resource.Quantity{}, + } + + for _, resourceName := range resourceNames { + nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, lowThreshold[resourceName]) + nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, highThreshold[resourceName]) + } + + } + return nodeThresholdsMap +} + +func getNodeUsage( + nodes []*v1.Node, + resourceNames []v1.ResourceName, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, ) []NodeUsage { var nodeUsageList []NodeUsage @@ -92,37 +124,10 @@ func getNodeUsage( continue } - nodeCapacity := node.Status.Capacity - if len(node.Status.Allocatable) > 0 { - nodeCapacity = node.Status.Allocatable - } - lowResourceThreshold := map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resourceThreshold(nodeCapacity, v1.ResourceCPU, lowThreshold[v1.ResourceCPU]), - v1.ResourceMemory: resourceThreshold(nodeCapacity, v1.ResourceMemory, lowThreshold[v1.ResourceMemory]), - v1.ResourcePods: resourceThreshold(nodeCapacity, v1.ResourcePods, lowThreshold[v1.ResourcePods]), - } - for _, name := range resourceNames { - if !isBasicResource(name) { - lowResourceThreshold[name] = resourceThreshold(nodeCapacity, name, lowThreshold[name]) - } - } - highResourceThreshold := map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resourceThreshold(nodeCapacity, v1.ResourceCPU, highThreshold[v1.ResourceCPU]), - v1.ResourceMemory: resourceThreshold(nodeCapacity, v1.ResourceMemory, highThreshold[v1.ResourceMemory]), - v1.ResourcePods: resourceThreshold(nodeCapacity, v1.ResourcePods, highThreshold[v1.ResourcePods]), - } - for _, name := range resourceNames { - if !isBasicResource(name) { - highResourceThreshold[name] = resourceThreshold(nodeCapacity, name, highThreshold[name]) - } - } - nodeUsageList = append(nodeUsageList, NodeUsage{ - node: node, - usage: nodeUtilization(node, pods, resourceNames), - allPods: pods, - lowResourceThreshold: lowResourceThreshold, - highResourceThreshold: highResourceThreshold, + node: node, + usage: nodeUtilization(node, pods, resourceNames), + allPods: pods, }) } @@ -171,19 +176,24 @@ func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 { // low and high thresholds, it is simply ignored. func classifyNodes( nodeUsages []NodeUsage, - lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage) bool, -) ([]NodeUsage, []NodeUsage) { - lowNodes, highNodes := []NodeUsage{}, []NodeUsage{} + nodeThresholds map[string]NodeThresholds, + lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool, +) ([]NodeInfo, []NodeInfo) { + lowNodes, highNodes := []NodeInfo{}, []NodeInfo{} for _, nodeUsage := range nodeUsages { - if lowThresholdFilter(nodeUsage.node, nodeUsage) { - klog.V(2).InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) - lowNodes = append(lowNodes, nodeUsage) - } else if highThresholdFilter(nodeUsage.node, nodeUsage) { - klog.V(2).InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) - highNodes = append(highNodes, nodeUsage) + nodeInfo := NodeInfo{ + NodeUsage: nodeUsage, + thresholds: nodeThresholds[nodeUsage.node.Name], + } + if lowThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) { + klog.InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) + lowNodes = append(lowNodes, nodeInfo) + } else if highThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) { + klog.InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) + highNodes = append(highNodes, nodeInfo) } else { - klog.V(2).InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) + klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage)) } } @@ -195,7 +205,7 @@ func classifyNodes( // TODO: @ravig Break this function into smaller functions. func evictPodsFromSourceNodes( ctx context.Context, - sourceNodes, destinationNodes []NodeUsage, + sourceNodes, destinationNodes []NodeInfo, podEvictor *evictions.PodEvictor, podFilter func(pod *v1.Pod) bool, resourceNames []v1.ResourceName, @@ -220,7 +230,7 @@ func evictPodsFromSourceNodes( if _, ok := totalAvailableUsage[name]; !ok { totalAvailableUsage[name] = resource.NewQuantity(0, resource.DecimalSI) } - totalAvailableUsage[name].Add(*node.highResourceThreshold[name]) + totalAvailableUsage[name].Add(*node.thresholds.highResourceThreshold[name]) totalAvailableUsage[name].Sub(*node.usage[name]) } } @@ -260,7 +270,7 @@ func evictPodsFromSourceNodes( func evictPods( ctx context.Context, inputPods []*v1.Pod, - nodeUsage NodeUsage, + nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity, taintsOfLowNodes map[string][]v1.Taint, podEvictor *evictions.PodEvictor, @@ -268,14 +278,14 @@ func evictPods( continueEviction continueEvictionCond, ) { - if continueEviction(nodeUsage, totalAvailableUsage) { + if continueEviction(nodeInfo, totalAvailableUsage) { for _, pod := range inputPods { if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod)) continue } - success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy) + success, err := podEvictor.EvictPod(ctx, pod, nodeInfo.node, strategy) if err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break @@ -286,20 +296,20 @@ func evictPods( for name := range totalAvailableUsage { if name == v1.ResourcePods { - nodeUsage.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI)) + nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI)) totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI)) } else { quantity := utils.GetResourceRequestQuantity(pod, name) - nodeUsage.usage[name].Sub(quantity) + nodeInfo.usage[name].Sub(quantity) totalAvailableUsage[name].Sub(quantity) } } keysAndValues := []interface{}{ - "node", nodeUsage.node.Name, - "CPU", nodeUsage.usage[v1.ResourceCPU].MilliValue(), - "Mem", nodeUsage.usage[v1.ResourceMemory].Value(), - "Pods", nodeUsage.usage[v1.ResourcePods].Value(), + "node", nodeInfo.node.Name, + "CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(), + "Mem", nodeInfo.usage[v1.ResourceMemory].Value(), + "Pods", nodeInfo.usage[v1.ResourcePods].Value(), } for name := range totalAvailableUsage { if !isBasicResource(name) { @@ -309,7 +319,7 @@ func evictPods( klog.V(3).InfoS("Updated node usage", keysAndValues...) // check if pods can be still evicted - if !continueEviction(nodeUsage, totalAvailableUsage) { + if !continueEviction(nodeInfo, totalAvailableUsage) { break } } @@ -318,7 +328,7 @@ func evictPods( } // sortNodesByUsage sorts nodes based on usage in descending order -func sortNodesByUsage(nodes []NodeUsage) { +func sortNodesByUsage(nodes []NodeInfo) { sort.Slice(nodes, func(i, j int) bool { ti := nodes[i].usage[v1.ResourceMemory].Value() + nodes[i].usage[v1.ResourceCPU].MilliValue() + nodes[i].usage[v1.ResourcePods].Value() tj := nodes[j].usage[v1.ResourceMemory].Value() + nodes[j].usage[v1.ResourceCPU].MilliValue() + nodes[j].usage[v1.ResourcePods].Value() @@ -338,10 +348,10 @@ func sortNodesByUsage(nodes []NodeUsage) { // isNodeAboveTargetUtilization checks if a node is overutilized // At least one resource has to be above the high threshold -func isNodeAboveTargetUtilization(usage NodeUsage) bool { +func isNodeAboveTargetUtilization(usage NodeUsage, threshold map[v1.ResourceName]*resource.Quantity) bool { for name, nodeValue := range usage.usage { // usage.highResourceThreshold[name] < nodeValue - if usage.highResourceThreshold[name].Cmp(*nodeValue) == -1 { + if threshold[name].Cmp(*nodeValue) == -1 { return true } } @@ -350,10 +360,10 @@ func isNodeAboveTargetUtilization(usage NodeUsage) bool { // isNodeWithLowUtilization checks if a node is underutilized // All resources have to be below the low threshold -func isNodeWithLowUtilization(usage NodeUsage) bool { +func isNodeWithLowUtilization(usage NodeUsage, threshold map[v1.ResourceName]*resource.Quantity) bool { for name, nodeValue := range usage.usage { // usage.lowResourceThreshold[name] < nodeValue - if usage.lowResourceThreshold[name].Cmp(*nodeValue) == -1 { + if threshold[name].Cmp(*nodeValue) == -1 { return false } }