mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 13:29:11 +01:00
210 lines
8.0 KiB
Go
210 lines
8.0 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package nodeutilization
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
"k8s.io/klog/v2"
|
|
|
|
"sigs.k8s.io/descheduler/pkg/api"
|
|
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
|
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
|
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
|
"sigs.k8s.io/descheduler/pkg/utils"
|
|
)
|
|
|
|
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
|
|
// to calculate nodes' utilization and not the actual resource usage.
|
|
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
|
|
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
|
|
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
|
|
klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
|
|
return
|
|
}
|
|
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
|
|
if err != nil {
|
|
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
|
|
return
|
|
}
|
|
|
|
nodeFit := false
|
|
if strategy.Params != nil {
|
|
nodeFit = strategy.Params.NodeFit
|
|
}
|
|
useDeviationThresholds := strategy.Params.NodeResourceUtilizationThresholds.UseDeviationThresholds
|
|
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
|
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
|
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds, useDeviationThresholds); err != nil {
|
|
klog.ErrorS(err, "LowNodeUtilization config is not valid")
|
|
return
|
|
}
|
|
|
|
// check if Pods/CPU/Mem are set, if not, set them to 100
|
|
if _, ok := thresholds[v1.ResourcePods]; !ok {
|
|
if useDeviationThresholds {
|
|
thresholds[v1.ResourcePods] = MinResourcePercentage
|
|
targetThresholds[v1.ResourcePods] = MinResourcePercentage
|
|
} else {
|
|
thresholds[v1.ResourcePods] = MaxResourcePercentage
|
|
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
|
|
}
|
|
}
|
|
if _, ok := thresholds[v1.ResourceCPU]; !ok {
|
|
if useDeviationThresholds {
|
|
thresholds[v1.ResourceCPU] = MinResourcePercentage
|
|
targetThresholds[v1.ResourceCPU] = MinResourcePercentage
|
|
} else {
|
|
thresholds[v1.ResourceCPU] = MaxResourcePercentage
|
|
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
|
|
}
|
|
}
|
|
if _, ok := thresholds[v1.ResourceMemory]; !ok {
|
|
if useDeviationThresholds {
|
|
thresholds[v1.ResourceMemory] = MinResourcePercentage
|
|
targetThresholds[v1.ResourceMemory] = MinResourcePercentage
|
|
} else {
|
|
thresholds[v1.ResourceMemory] = MaxResourcePercentage
|
|
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
|
|
}
|
|
}
|
|
resourceNames := getResourceNames(thresholds)
|
|
|
|
lowNodes, sourceNodes := classifyNodes(
|
|
getNodeUsage(nodes, resourceNames, getPodsAssignedToNode),
|
|
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode, useDeviationThresholds),
|
|
// The node has to be schedulable (to be able to move workload there)
|
|
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
|
if nodeutil.IsNodeUnschedulable(node) {
|
|
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node))
|
|
return false
|
|
}
|
|
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
|
},
|
|
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
|
return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold)
|
|
},
|
|
)
|
|
|
|
// log message for nodes with low utilization
|
|
underutilizationCriteria := []interface{}{
|
|
"CPU", thresholds[v1.ResourceCPU],
|
|
"Mem", thresholds[v1.ResourceMemory],
|
|
"Pods", thresholds[v1.ResourcePods],
|
|
}
|
|
for name := range thresholds {
|
|
if !nodeutil.IsBasicResource(name) {
|
|
underutilizationCriteria = append(underutilizationCriteria, string(name), int64(thresholds[name]))
|
|
}
|
|
}
|
|
klog.V(1).InfoS("Criteria for a node under utilization", underutilizationCriteria...)
|
|
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
|
|
|
|
// log message for over utilized nodes
|
|
overutilizationCriteria := []interface{}{
|
|
"CPU", targetThresholds[v1.ResourceCPU],
|
|
"Mem", targetThresholds[v1.ResourceMemory],
|
|
"Pods", targetThresholds[v1.ResourcePods],
|
|
}
|
|
for name := range targetThresholds {
|
|
if !nodeutil.IsBasicResource(name) {
|
|
overutilizationCriteria = append(overutilizationCriteria, string(name), int64(targetThresholds[name]))
|
|
}
|
|
}
|
|
klog.V(1).InfoS("Criteria for a node above target utilization", overutilizationCriteria...)
|
|
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
|
|
|
|
if len(lowNodes) == 0 {
|
|
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
|
return
|
|
}
|
|
|
|
if len(lowNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
|
|
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
|
|
return
|
|
}
|
|
|
|
if len(lowNodes) == len(nodes) {
|
|
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
|
|
return
|
|
}
|
|
|
|
if len(sourceNodes) == 0 {
|
|
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
|
|
return
|
|
}
|
|
|
|
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
|
|
|
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
|
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
|
if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) {
|
|
return false
|
|
}
|
|
for name := range totalAvailableUsage {
|
|
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// Sort the nodes by the usage in descending order
|
|
sortNodesByUsage(sourceNodes, false)
|
|
|
|
evictPodsFromSourceNodes(
|
|
ctx,
|
|
sourceNodes,
|
|
lowNodes,
|
|
podEvictor,
|
|
evictable.IsEvictable,
|
|
resourceNames,
|
|
"LowNodeUtilization",
|
|
continueEvictionCond)
|
|
|
|
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
|
|
}
|
|
|
|
// validateLowUtilizationStrategyConfig checks if the strategy's config is valid
|
|
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) error {
|
|
// validate thresholds and targetThresholds config
|
|
if err := validateThresholds(thresholds); err != nil {
|
|
return fmt.Errorf("thresholds config is not valid: %v", err)
|
|
}
|
|
if err := validateThresholds(targetThresholds); err != nil {
|
|
return fmt.Errorf("targetThresholds config is not valid: %v", err)
|
|
}
|
|
|
|
// validate if thresholds and targetThresholds have same resources configured
|
|
if len(thresholds) != len(targetThresholds) {
|
|
return fmt.Errorf("thresholds and targetThresholds configured different resources")
|
|
}
|
|
for resourceName, value := range thresholds {
|
|
if targetValue, ok := targetThresholds[resourceName]; !ok {
|
|
return fmt.Errorf("thresholds and targetThresholds configured different resources")
|
|
} else if value > targetValue && !useDeviationThresholds {
|
|
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
|
|
}
|
|
}
|
|
return nil
|
|
}
|