From 150f945592ac430f646b24a2298fa5349e83e6b7 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Tue, 14 Apr 2020 11:29:01 +0200 Subject: [PATCH] lownodeutilization: clasify pods of over utilized nodes only Only over utilized nodes need clasification of pods into categories. Thus, skipping categorizing of pods which saves computation time in cases where the number of over utilized nodes makes less than 50% of all nodes or their fraction. --- .../strategies/lownodeutilization.go | 136 +++++++++++------- .../strategies/lownodeutilization_test.go | 4 +- 2 files changed, 85 insertions(+), 55 deletions(-) diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index bc577723c..a49253a4b 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -33,13 +33,9 @@ import ( ) type NodeUsageMap struct { - node *v1.Node - usage api.ResourceThresholds - allPods []*v1.Pod - nonRemovablePods []*v1.Pod - bePods []*v1.Pod - bPods []*v1.Pod - gPods []*v1.Pod + node *v1.Node + usage api.ResourceThresholds + allPods []*v1.Pod } type NodePodsMap map[*v1.Node][]*v1.Pod @@ -91,7 +87,16 @@ func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerS targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods]) klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes)) - totalPodsEvicted := evictPodsFromTargetNodes(ds.Client, evictionPolicyGroupVersion, targetNodes, lowNodes, targetThresholds, ds.DryRun, ds.MaxNoOfPodsToEvictPerNode, nodepodCount) + totalPodsEvicted := evictPodsFromTargetNodes( + ds.Client, + evictionPolicyGroupVersion, + targetNodes, + lowNodes, + targetThresholds, + ds.DryRun, + ds.MaxNoOfPodsToEvictPerNode, + ds.EvictLocalStoragePods, + nodepodCount) klog.V(1).Infof("Total number of pods evicted: %v", totalPodsEvicted) } @@ -134,9 +139,12 @@ func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool { func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds, evictLocalStoragePods bool) ([]NodeUsageMap, []NodeUsageMap) { lowNodes, targetNodes := []NodeUsageMap{}, []NodeUsageMap{} for node, pods := range npm { - usage, allPods, nonRemovablePods, bePods, bPods, gPods := NodeUtilization(node, pods, evictLocalStoragePods) - nuMap := NodeUsageMap{node, usage, allPods, nonRemovablePods, bePods, bPods, gPods} - + usage := nodeUtilization(node, pods, evictLocalStoragePods) + nuMap := NodeUsageMap{ + node: node, + usage: usage, + allPods: pods, + } // Check if node is underutilized and if we can schedule pods on it. if !nodeutil.IsNodeUnschedulable(node) && IsNodeWithLowUtilization(usage, thresholds) { klog.V(2).Infof("Node %#v is under utilized with usage: %#v", node.Name, usage) @@ -147,7 +155,6 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr } else { klog.V(2).Infof("Node %#v is appropriately utilized with usage: %#v", node.Name, usage) } - klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, bePods:%v, bPods:%v, gPods:%v", len(allPods), len(nonRemovablePods), len(bePods), len(bPods), len(gPods)) } return lowNodes, targetNodes } @@ -155,7 +162,17 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr // evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not // evicts them based on QoS as fallback option. // TODO: @ravig Break this function into smaller functions. -func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, dryRun bool, maxPodsToEvict int, nodepodCount utils.NodePodEvictedCount) int { +func evictPodsFromTargetNodes( + client clientset.Interface, + evictionPolicyGroupVersion string, + targetNodes, + lowNodes []NodeUsageMap, + targetThresholds api.ResourceThresholds, + dryRun bool, + maxPodsToEvict int, + evictLocalStoragePods bool, + nodepodCount utils.NodePodEvictedCount, +) int { podsEvicted := 0 SortNodesByUsage(targetNodes) @@ -197,11 +214,14 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage) currentPodsEvicted := nodepodCount[node.node] + nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods := classifyPods(node.allPods, evictLocalStoragePods) + klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, bestEffortPods:%v, burstablePods:%v, guaranteedPods:%v", len(node.allPods), len(nonRemovablePods), len(bestEffortPods), len(burstablePods), len(guaranteedPods)) + // Check if one pod has priority, if yes, assume that all pods have priority and evict pods based on priority. if node.allPods[0].Spec.Priority != nil { klog.V(1).Infof("All pods have priority associated with them. Evicting pods based on priority") evictablePods := make([]*v1.Pod, 0) - evictablePods = append(append(node.bPods, node.bePods...), node.gPods...) + evictablePods = append(append(burstablePods, bestEffortPods...), guaranteedPods...) // sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers. sortPodsBasedOnPriority(evictablePods) @@ -210,13 +230,13 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer // TODO: Remove this when we support only priority. // Falling back to evicting pods based on priority. klog.V(1).Infof("Evicting pods based on QoS") - klog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods)) + klog.V(1).Infof("There are %v non-evictable pods on the node", len(nonRemovablePods)) // evict best effort pods - evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes) + evictPods(bestEffortPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes) // evict burstable pods - evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes) + evictPods(burstablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes) // evict guaranteed pods - evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes) + evictPods(guaranteedPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes) } nodepodCount[node.node] = currentPodsEvicted podsEvicted = podsEvicted + nodepodCount[node.node] @@ -361,38 +381,18 @@ func IsNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds return true } -// NodeUtilization returns the current usage of node. -func NodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) (api.ResourceThresholds, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) { - bePods := []*v1.Pod{} - nonRemovablePods := []*v1.Pod{} - bPods := []*v1.Pod{} - gPods := []*v1.Pod{} - totalReqs := map[v1.ResourceName]resource.Quantity{} +func nodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) api.ResourceThresholds { + totalReqs := map[v1.ResourceName]*resource.Quantity{ + v1.ResourceCPU: {}, + v1.ResourceMemory: {}, + } for _, pod := range pods { - // We need to compute the usage of nonRemovablePods unless it is a best effort pod. So, cannot use podutil.ListEvictablePodsOnNode - if !podutil.IsEvictable(pod, evictLocalStoragePods) { - nonRemovablePods = append(nonRemovablePods, pod) - if podutil.IsBestEffortPod(pod) { - continue - } - } else if podutil.IsBestEffortPod(pod) { - bePods = append(bePods, pod) - continue - } else if podutil.IsBurstablePod(pod) { - bPods = append(bPods, pod) - } else { - gPods = append(gPods, pod) - } - req, _ := utils.PodRequestsAndLimits(pod) for name, quantity := range req { if name == v1.ResourceCPU || name == v1.ResourceMemory { - if value, ok := totalReqs[name]; !ok { - totalReqs[name] = quantity.DeepCopy() - } else { - value.Add(quantity) - totalReqs[name] = value - } + // As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero, + // the format of the quantity will be updated to the format of y. + totalReqs[name].Add(quantity) } } } @@ -402,12 +402,42 @@ func NodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) nodeCapacity = node.Status.Allocatable } - usage := api.ResourceThresholds{} - totalCPUReq := totalReqs[v1.ResourceCPU] - totalMemReq := totalReqs[v1.ResourceMemory] totalPods := len(pods) - usage[v1.ResourceCPU] = api.Percentage((float64(totalCPUReq.MilliValue()) * 100) / float64(nodeCapacity.Cpu().MilliValue())) - usage[v1.ResourceMemory] = api.Percentage(float64(totalMemReq.Value()) / float64(nodeCapacity.Memory().Value()) * 100) - usage[v1.ResourcePods] = api.Percentage((float64(totalPods) * 100) / float64(nodeCapacity.Pods().Value())) - return usage, pods, nonRemovablePods, bePods, bPods, gPods + return api.ResourceThresholds{ + v1.ResourceCPU: api.Percentage((float64(totalReqs[v1.ResourceCPU].MilliValue()) * 100) / float64(nodeCapacity.Cpu().MilliValue())), + v1.ResourceMemory: api.Percentage(float64(totalReqs[v1.ResourceMemory].Value()) / float64(nodeCapacity.Memory().Value()) * 100), + v1.ResourcePods: api.Percentage((float64(totalPods) * 100) / float64(nodeCapacity.Pods().Value())), + } +} + +func classifyPods(pods []*v1.Pod, evictLocalStoragePods bool) ([]*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) { + var nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods []*v1.Pod + + // From https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/ + // + // For a Pod to be given a QoS class of Guaranteed: + // - every Container in the Pod must have a memory limit and a memory request, and they must be the same. + // - every Container in the Pod must have a CPU limit and a CPU request, and they must be the same. + // A Pod is given a QoS class of Burstable if: + // - the Pod does not meet the criteria for QoS class Guaranteed. + // - at least one Container in the Pod has a memory or CPU request. + // For a Pod to be given a QoS class of BestEffort, the Containers in the Pod must not have any memory or CPU limits or requests. + + for _, pod := range pods { + if !podutil.IsEvictable(pod, evictLocalStoragePods) { + nonRemovablePods = append(nonRemovablePods, pod) + continue + } + + switch utils.GetPodQOS(pod) { + case v1.PodQOSGuaranteed: + guaranteedPods = append(guaranteedPods, pod) + case v1.PodQOSBurstable: + burstablePods = append(burstablePods, pod) + default: // alias v1.PodQOSBestEffort + bestEffortPods = append(bestEffortPods, pod) + } + } + + return nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods } diff --git a/pkg/descheduler/strategies/lownodeutilization_test.go b/pkg/descheduler/strategies/lownodeutilization_test.go index eb90065ab..0bdcda76c 100644 --- a/pkg/descheduler/strategies/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/lownodeutilization_test.go @@ -129,7 +129,7 @@ func TestLowNodeUtilizationWithoutPriority(t *testing.T) { npe[n1] = 0 npe[n2] = 0 npe[n3] = 0 - podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, npe) + podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, false, npe) if expectedPodsEvicted != podsEvicted { t.Errorf("Expected %#v pods to be evicted but %#v got evicted", expectedPodsEvicted, podsEvicted) } @@ -235,7 +235,7 @@ func TestLowNodeUtilizationWithPriorities(t *testing.T) { npe[n1] = 0 npe[n2] = 0 npe[n3] = 0 - podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, npe) + podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, false, npe) if expectedPodsEvicted != podsEvicted { t.Errorf("Expected %#v pods to be evicted but %#v got evicted", expectedPodsEvicted, podsEvicted) }