From 81b816d4a4ff6417009cd7bd384d0da87b802b44 Mon Sep 17 00:00:00 2001 From: ZongqiangZhang Date: Fri, 2 Apr 2021 21:37:51 +0800 Subject: [PATCH] support extended resources in lownodeutilization --- README.md | 10 +- .../strategies/lownodeutilization.go | 203 +++++++++----- .../strategies/lownodeutilization_test.go | 253 +++++++++++++++--- test/test_utils.go | 11 + 4 files changed, 380 insertions(+), 97 deletions(-) diff --git a/README.md b/README.md index 235e73881..f08837a78 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ in the hope that recreation of evicted pods will be scheduled on these underutil parameters of this strategy are configured under `nodeResourceUtilizationThresholds`. The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold -`thresholds` can be configured for cpu, memory, and number of pods in terms of percentage (the percentage is +`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage (the percentage is calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity). For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node). @@ -181,7 +181,7 @@ If a node's usage is below threshold for all (cpu, memory, and number of pods), Currently, pods request resource requirements are considered for computing node resource utilization. There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes -from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, or number of pods), +from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, number of pods, or extended resources), the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`, can be configured for cpu, memory, and number of pods too in terms of percentage. @@ -221,14 +221,12 @@ strategies: ``` Policy should pass the following validation checks: -* Only three types of resources are supported: `cpu`, `memory` and `pods`. +* Three basic native types of resources are supported: `cpu`, `memory` and `pods`. If any of these resource types is not specified, all its thresholds default to 100% to avoid nodes going from underutilized to overutilized. +* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional, and will not be used to compute node's usage if it's not specified in `thresholds` and `targetThresholds` explicitly. * `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources. * The valid range of the resource's percentage value is \[0, 100\] * Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource. -If any of the resource types is not specified, all its thresholds default to 100% to avoid nodes going -from underutilized to overutilized. - There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`. This parameter can be configured to activate the strategy only when the number of under utilized nodes are above the configured value. This could be helpful in large clusters where a few nodes could go diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 85aa3f696..27614f7db 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -97,9 +97,10 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg thresholds[v1.ResourceMemory] = MaxResourcePercentage targetThresholds[v1.ResourceMemory] = MaxResourcePercentage } + resourceNames := getResourceNames(thresholds) lowNodes, targetNodes := classifyNodes( - getNodeUsage(ctx, client, nodes, thresholds, targetThresholds), + getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames), // The node has to be schedulable (to be able to move workload there) func(node *v1.Node, usage NodeUsage) bool { if nodeutil.IsNodeUnschedulable(node) { @@ -113,26 +114,54 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg }, ) - klog.V(1).InfoS("Criteria for a node under utilization", - "CPU", thresholds[v1.ResourceCPU], "Mem", thresholds[v1.ResourceMemory], "Pods", thresholds[v1.ResourcePods]) - klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes)) - klog.V(1).InfoS("Criteria for a node above target utilization", - "CPU", targetThresholds[v1.ResourceCPU], "Mem", targetThresholds[v1.ResourceMemory], "Pods", targetThresholds[v1.ResourcePods]) - klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(targetNodes)) + // log message in one line + keysAndValues := []interface{}{ + "CPU", int64(float64(thresholds[v1.ResourceCPU])), + "Mem", int64(float64(thresholds[v1.ResourceMemory])), + "Pods", int64(float64(thresholds[v1.ResourcePods])), + } + for name := range thresholds { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), int64(float64(thresholds[name]))) + } + } + klog.V(1).InfoS("Criteria for a node under utilization", keysAndValues...) if len(lowNodes) == 0 { klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further") return } + klog.V(1).InfoS("Total number of underutilized nodes", "totalNumber", len(lowNodes)) + if len(lowNodes) < strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes { klog.V(1).InfoS("Number of nodes underutilized is less than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes) return } - if len(targetNodes) == 0 { - klog.V(1).InfoS("No node is overutilized, nothing to do here, you might tune your thresholds further") + + if len(lowNodes) == len(nodes) { + klog.V(1).InfoS("All nodes are underutilized, nothing to do here") return } + if len(targetNodes) == 0 { + klog.V(1).InfoS("All nodes are under target utilization, nothing to do here") + return + } + + // log message in one line + keysAndValues = []interface{}{ + "CPU", int64(float64(targetThresholds[v1.ResourceCPU])), + "Mem", int64(float64(targetThresholds[v1.ResourceMemory])), + "Pods", int64(float64(targetThresholds[v1.ResourcePods])), + } + for name := range targetThresholds { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), int64(float64(targetThresholds[name]))) + } + } + klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...) + + klog.V(1).InfoS("Number of nodes above target utilization", "totalNumber", len(targetNodes)) evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) evictPodsFromTargetNodes( @@ -140,7 +169,10 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg targetNodes, lowNodes, podEvictor, - evictable.IsEvictable) + evictable.IsEvictable, + resourceNames) + + klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted()) } // validateStrategyConfig checks if the strategy's config is valid @@ -173,13 +205,8 @@ func validateThresholds(thresholds api.ResourceThresholds) error { return fmt.Errorf("no resource threshold is configured") } for name, percent := range thresholds { - switch name { - case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods: - if percent < MinResourcePercentage || percent > MaxResourcePercentage { - return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage) - } - default: - return fmt.Errorf("only cpu, memory, or pods thresholds can be specified") + if percent < MinResourcePercentage || percent > MaxResourcePercentage { + return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage) } } return nil @@ -190,6 +217,7 @@ func getNodeUsage( client clientset.Interface, nodes []*v1.Node, lowThreshold, highThreshold api.ResourceThresholds, + resourceNames []v1.ResourceName, ) []NodeUsage { nodeUsageList := []NodeUsage{} @@ -200,28 +228,42 @@ func getNodeUsage( continue } + // A threshold is in percentages but in <0;100> interval. + // Performing `threshold * 0.01` will convert <0;100> interval into <0;1>. + // Multiplying it with capacity will give fraction of the capacity corresponding to the given high/low resource threshold in Quantity units. nodeCapacity := node.Status.Capacity if len(node.Status.Allocatable) > 0 { nodeCapacity = node.Status.Allocatable } + lowResourceThreshold := map[v1.ResourceName]*resource.Quantity{ + v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(lowThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI), + v1.ResourceMemory: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI), + v1.ResourcePods: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI), + } + for _, name := range resourceNames { + if !isBasicResource(name) { + cap := nodeCapacity[name] + lowResourceThreshold[name] = resource.NewQuantity(int64(float64(lowThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI) + } + } + highResourceThreshold := map[v1.ResourceName]*resource.Quantity{ + v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(highThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI), + v1.ResourceMemory: resource.NewQuantity(int64(float64(highThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI), + v1.ResourcePods: resource.NewQuantity(int64(float64(highThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI), + } + for _, name := range resourceNames { + if !isBasicResource(name) { + cap := nodeCapacity[name] + highResourceThreshold[name] = resource.NewQuantity(int64(float64(highThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI) + } + } nodeUsageList = append(nodeUsageList, NodeUsage{ - node: node, - usage: nodeUtilization(node, pods), - allPods: pods, - // A threshold is in percentages but in <0;100> interval. - // Performing `threshold * 0.01` will convert <0;100> interval into <0;1>. - // Multiplying it with capacity will give fraction of the capacity corresponding to the given high/low resource threshold in Quantity units. - lowResourceThreshold: map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(lowThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI), - v1.ResourceMemory: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI), - v1.ResourcePods: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI), - }, - highResourceThreshold: map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(highThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI), - v1.ResourceMemory: resource.NewQuantity(int64(float64(highThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI), - v1.ResourcePods: resource.NewQuantity(int64(float64(highThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI), - }, + node: node, + usage: nodeUtilization(node, pods, resourceNames), + allPods: pods, + lowResourceThreshold: lowResourceThreshold, + highResourceThreshold: highResourceThreshold, }) } @@ -276,11 +318,12 @@ func evictPodsFromTargetNodes( targetNodes, lowNodes []NodeUsage, podEvictor *evictions.PodEvictor, podFilter func(pod *v1.Pod) bool, + resourceNames []v1.ResourceName, ) { sortNodesByUsage(targetNodes) - // upper bound on total number of pods/cpu/memory to be moved + // upper bound on total number of pods/cpu/memory and optional extened resources to be moved totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{ v1.ResourcePods: {}, v1.ResourceCPU: {}, @@ -291,18 +334,27 @@ func evictPodsFromTargetNodes( for _, node := range lowNodes { taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints - for name := range totalAvailableUsage { + for name := range node.highResourceThreshold { + if _, ok := totalAvailableUsage[name]; !ok { + totalAvailableUsage[name] = resource.NewQuantity(0, resource.DecimalSI) + } totalAvailableUsage[name].Add(*node.highResourceThreshold[name]) totalAvailableUsage[name].Sub(*node.usage[name]) } } - klog.V(1).InfoS( - "Total capacity to be moved", + // log message in one line + keysAndValues := []interface{}{ "CPU", totalAvailableUsage[v1.ResourceCPU].MilliValue(), "Mem", totalAvailableUsage[v1.ResourceMemory].Value(), "Pods", totalAvailableUsage[v1.ResourcePods].Value(), - ) + } + for name := range totalAvailableUsage { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value()) + } + } + klog.V(1).InfoS("Total capacity to be moved", keysAndValues...) for _, node := range targetNodes { klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage) @@ -336,15 +388,12 @@ func evictPods( if !isNodeAboveTargetUtilization(nodeUsage) { return false } - if totalAvailableUsage[v1.ResourcePods].CmpInt64(0) < 1 { - return false - } - if totalAvailableUsage[v1.ResourceCPU].CmpInt64(0) < 1 { - return false - } - if totalAvailableUsage[v1.ResourceMemory].CmpInt64(0) < 1 { - return false + for name := range totalAvailableUsage { + if totalAvailableUsage[name].CmpInt64(0) < 1 { + return false + } } + return true } @@ -352,7 +401,6 @@ func evictPods( for _, pod := range inputPods { if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod)) - continue } @@ -365,16 +413,16 @@ func evictPods( if success { klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod), "err", err) - cpuQuantity := utils.GetResourceRequestQuantity(pod, v1.ResourceCPU) - nodeUsage.usage[v1.ResourceCPU].Sub(cpuQuantity) - totalAvailableUsage[v1.ResourceCPU].Sub(cpuQuantity) - - memoryQuantity := utils.GetResourceRequestQuantity(pod, v1.ResourceMemory) - nodeUsage.usage[v1.ResourceMemory].Sub(memoryQuantity) - totalAvailableUsage[v1.ResourceMemory].Sub(memoryQuantity) - - nodeUsage.usage[v1.ResourcePods].Sub(*resource.NewQuantity(1, resource.DecimalSI)) - totalAvailableUsage[v1.ResourcePods].Sub(*resource.NewQuantity(1, resource.DecimalSI)) + for name := range totalAvailableUsage { + if name == v1.ResourcePods { + nodeUsage.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI)) + totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI)) + } else { + quantity := utils.GetResourceRequestQuantity(pod, name) + nodeUsage.usage[name].Sub(quantity) + totalAvailableUsage[name].Sub(quantity) + } + } klog.V(3).InfoS("Updated node usage", "updatedUsage", nodeUsage) // check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved @@ -391,6 +439,15 @@ func sortNodesByUsage(nodes []NodeUsage) { sort.Slice(nodes, func(i, j int) bool { ti := nodes[i].usage[v1.ResourceMemory].Value() + nodes[i].usage[v1.ResourceCPU].MilliValue() + nodes[i].usage[v1.ResourcePods].Value() tj := nodes[j].usage[v1.ResourceMemory].Value() + nodes[j].usage[v1.ResourceCPU].MilliValue() + nodes[j].usage[v1.ResourcePods].Value() + + // extended resources + for name := range nodes[i].usage { + if !isBasicResource(name) { + ti = ti + nodes[i].usage[name].Value() + tj = tj + nodes[j].usage[name].Value() + } + } + // To return sorted in descending order return ti > tj }) @@ -421,16 +478,42 @@ func isNodeWithLowUtilization(usage NodeUsage) bool { return true } -func nodeUtilization(node *v1.Node, pods []*v1.Pod) map[v1.ResourceName]*resource.Quantity { +// getResourceNames returns list of resource names in resource thresholds +func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName { + resourceNames := make([]v1.ResourceName, 0, len(thresholds)) + for name := range thresholds { + resourceNames = append(resourceNames, name) + } + return resourceNames +} + +// isBasicResource checks if resource is basic native. +func isBasicResource(name v1.ResourceName) bool { + switch name { + case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods: + return true + default: + return false + } +} + +func nodeUtilization(node *v1.Node, pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity { totalReqs := map[v1.ResourceName]*resource.Quantity{ v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI), v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI), v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI), } + for _, name := range resourceNames { + if !isBasicResource(name) { + totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI) + } + } + for _, pod := range pods { req, _ := utils.PodRequestsAndLimits(pod) - for name, quantity := range req { - if name == v1.ResourceCPU || name == v1.ResourceMemory { + for _, name := range resourceNames { + quantity, ok := req[name] + if ok && name != v1.ResourcePods { // As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero, // the format of the quantity will be updated to the format of y. totalReqs[name].Add(quantity) diff --git a/pkg/descheduler/strategies/lownodeutilization_test.go b/pkg/descheduler/strategies/lownodeutilization_test.go index a7a30918f..4f1fb3b6c 100644 --- a/pkg/descheduler/strategies/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/lownodeutilization_test.go @@ -36,8 +36,9 @@ import ( ) var ( - lowPriority = int32(0) - highPriority = int32(10000) + lowPriority = int32(0) + highPriority = int32(10000) + extendedResource = v1.ResourceName("example.com/foo") ) func TestLowNodeUtilization(t *testing.T) { @@ -104,7 +105,7 @@ func TestLowNodeUtilization(t *testing.T) { }, n2NodeName: { Items: []v1.Pod{ - *test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef), + *test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), }, }, n3NodeName: {}, @@ -163,7 +164,7 @@ func TestLowNodeUtilization(t *testing.T) { }, n2NodeName: { Items: []v1.Pod{ - *test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef), + *test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), }, }, n3NodeName: {}, @@ -301,7 +302,7 @@ func TestLowNodeUtilization(t *testing.T) { }, n2NodeName: { Items: []v1.Pod{ - *test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef), + *test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), }, }, n3NodeName: {}, @@ -377,7 +378,7 @@ func TestLowNodeUtilization(t *testing.T) { }, n2NodeName: { Items: []v1.Pod{ - *test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef), + *test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), }, }, n3NodeName: {}, @@ -386,6 +387,132 @@ func TestLowNodeUtilization(t *testing.T) { expectedPodsEvicted: 4, evictedPods: []string{"p1", "p2", "p4", "p5"}, }, + { + name: "with extended resource", + thresholds: api.ResourceThresholds{ + v1.ResourcePods: 30, + extendedResource: 30, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourcePods: 50, + extendedResource: 50, + }, + nodes: map[string]*v1.Node{ + n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, func(node *v1.Node) { + test.SetNodeExtendedResource(node, extendedResource, 8) + }), + n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) { + test.SetNodeExtendedResource(node, extendedResource, 8) + }), + n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: map[string]*v1.PodList{ + n1NodeName: { + Items: []v1.Pod{ + *test.BuildTestPod("p1", 0, 0, n1NodeName, func(pod *v1.Pod) { + // A pod with extended resource. + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + *test.BuildTestPod("p2", 0, 0, n1NodeName, func(pod *v1.Pod) { + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + *test.BuildTestPod("p3", 0, 0, n1NodeName, func(pod *v1.Pod) { + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + *test.BuildTestPod("p4", 0, 0, n1NodeName, func(pod *v1.Pod) { + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + *test.BuildTestPod("p5", 0, 0, n1NodeName, func(pod *v1.Pod) { + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + *test.BuildTestPod("p6", 0, 0, n1NodeName, func(pod *v1.Pod) { + test.SetNormalOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + + *test.BuildTestPod("p7", 0, 0, n1NodeName, func(pod *v1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + *test.BuildTestPod("p8", 0, 0, n1NodeName, func(pod *v1.Pod) { + // A Critical Pod. + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + }, + }, + n2NodeName: { + Items: []v1.Pod{ + *test.BuildTestPod("p9", 0, 0, n2NodeName, func(pod *v1.Pod) { + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + }, + }, + n3NodeName: {}, + }, + maxPodsToEvictPerNode: 0, + // 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before extended resource is depleted + expectedPodsEvicted: 3, + }, + { + name: "with extended resource in some of nodes", + thresholds: api.ResourceThresholds{ + v1.ResourcePods: 30, + extendedResource: 30, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourcePods: 50, + extendedResource: 50, + }, + nodes: map[string]*v1.Node{ + n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, func(node *v1.Node) { + test.SetNodeExtendedResource(node, extendedResource, 8) + }), + n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: map[string]*v1.PodList{ + n1NodeName: { + Items: []v1.Pod{ + *test.BuildTestPod("p1", 0, 0, n1NodeName, func(pod *v1.Pod) { + // A pod with extended resource. + test.SetRSOwnerRef(pod) + test.SetPodExtendedResourceRequest(pod, extendedResource, 1) + }), + }, + }, + n2NodeName: { + Items: []v1.Pod{ + *test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef), + }, + }, + n3NodeName: {}, + }, + maxPodsToEvictPerNode: 0, + // 0 pods available for eviction because there's no enough extended resource in node2 + expectedPodsEvicted: 0, + }, } for _, test := range testCases { @@ -491,19 +618,6 @@ func TestValidateStrategyConfig(t *testing.T) { errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf( "%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)), }, - { - name: "passing invalid targetThresholds", - thresholds: api.ResourceThresholds{ - v1.ResourceCPU: 20, - v1.ResourceMemory: 20, - }, - targetThresholds: api.ResourceThresholds{ - v1.ResourceCPU: 80, - "resourceInvalid": 80, - }, - errInfo: fmt.Errorf("targetThresholds config is not valid: %v", - fmt.Errorf("only cpu, memory, or pods thresholds can be specified")), - }, { name: "thresholds and targetThresholds configured different num of resources", thresholds: api.ResourceThresholds{ @@ -541,6 +655,60 @@ func TestValidateStrategyConfig(t *testing.T) { }, errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU), }, + { + name: "only thresholds configured extended resource", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + extendedResource: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + }, + errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), + }, + { + name: "only targetThresholds configured extended resource", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + extendedResource: 80, + }, + errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), + }, + { + name: "thresholds and targetThresholds configured different extended resources", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + extendedResource: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + "example.com/bar": 80, + }, + errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"), + }, + { + name: "thresholds' extended resource config value is greater than targetThresholds'", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + extendedResource: 90, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + extendedResource: 20, + }, + errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", extendedResource), + }, { name: "passing valid strategy config", thresholds: api.ResourceThresholds{ @@ -553,6 +721,20 @@ func TestValidateStrategyConfig(t *testing.T) { }, errInfo: nil, }, + { + name: "passing valid strategy config with extended resource", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + extendedResource: 20, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 80, + v1.ResourceMemory: 80, + extendedResource: 80, + }, + errInfo: nil, + }, } for _, testCase := range tests { @@ -587,20 +769,12 @@ func TestValidateThresholds(t *testing.T) { errInfo: fmt.Errorf("no resource threshold is configured"), }, { - name: "passing unsupported resource name", + name: "passing extended resource name other than cpu/memory/pods", input: api.ResourceThresholds{ - v1.ResourceCPU: 40, - v1.ResourceStorage: 25.5, + v1.ResourceCPU: 40, + extendedResource: 50, }, - errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"), - }, - { - name: "passing invalid resource name", - input: api.ResourceThresholds{ - v1.ResourceCPU: 40, - "coolResource": 42.0, - }, - errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"), + errInfo: nil, }, { name: "passing invalid resource value", @@ -634,6 +808,23 @@ func TestValidateThresholds(t *testing.T) { }, errInfo: nil, }, + { + name: "passing a valid threshold with only extended resource", + input: api.ResourceThresholds{ + extendedResource: 80, + }, + errInfo: nil, + }, + { + name: "passing a valid threshold with cpu, memory, pods and extended resource", + input: api.ResourceThresholds{ + v1.ResourceCPU: 20, + v1.ResourceMemory: 30, + v1.ResourcePods: 40, + extendedResource: 50, + }, + errInfo: nil, + }, } for _, test := range tests { diff --git a/test/test_utils.go b/test/test_utils.go index 7b35ab8e0..358a443cf 100644 --- a/test/test_utils.go +++ b/test/test_utils.go @@ -173,3 +173,14 @@ func SetPodPriority(pod *v1.Pod, priority int32) { func SetNodeUnschedulable(node *v1.Node) { node.Spec.Unschedulable = true } + +// SetPodExtendedResourceRequest sets the given pod's extended resources +func SetPodExtendedResourceRequest(pod *v1.Pod, resourceName v1.ResourceName, requestQuantity int64) { + pod.Spec.Containers[0].Resources.Requests[resourceName] = *resource.NewQuantity(requestQuantity, resource.DecimalSI) +} + +// SetNodeExtendedResouces sets the given node's extended resources +func SetNodeExtendedResource(node *v1.Node, resourceName v1.ResourceName, requestQuantity int64) { + node.Status.Capacity[resourceName] = *resource.NewQuantity(requestQuantity, resource.DecimalSI) + node.Status.Allocatable[resourceName] = *resource.NewQuantity(requestQuantity, resource.DecimalSI) +}