mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
Merge pull request #762 from ingvagabund/nodeutilization-refactor
Promote NodeUsage to NodeInfo, evaluate thresholds separately
This commit is contained in:
@@ -63,16 +63,17 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate
|
||||
resourceNames := getResourceNames(targetThresholds)
|
||||
|
||||
sourceNodes, highNodes := classifyNodes(
|
||||
getNodeUsage(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode),
|
||||
func(node *v1.Node, usage NodeUsage) bool {
|
||||
return isNodeWithLowUtilization(usage)
|
||||
getNodeUsage(nodes, resourceNames, getPodsAssignedToNode),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames),
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
||||
},
|
||||
func(node *v1.Node, usage NodeUsage) bool {
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(node) {
|
||||
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(node))
|
||||
return false
|
||||
}
|
||||
return !isNodeWithLowUtilization(usage)
|
||||
return !isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
||||
})
|
||||
|
||||
// log message in one line
|
||||
@@ -110,7 +111,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
// stop if the total available usage has dropped to zero - no more pods can be scheduled
|
||||
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
for name := range totalAvailableUsage {
|
||||
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
||||
return false
|
||||
|
||||
@@ -73,17 +73,18 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
resourceNames := getResourceNames(thresholds)
|
||||
|
||||
lowNodes, sourceNodes := classifyNodes(
|
||||
getNodeUsage(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode),
|
||||
getNodeUsage(nodes, resourceNames, getPodsAssignedToNode),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames),
|
||||
// The node has to be schedulable (to be able to move workload there)
|
||||
func(node *v1.Node, usage NodeUsage) bool {
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(node) {
|
||||
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node))
|
||||
return false
|
||||
}
|
||||
return isNodeWithLowUtilization(usage)
|
||||
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
||||
},
|
||||
func(node *v1.Node, usage NodeUsage) bool {
|
||||
return isNodeAboveTargetUtilization(usage)
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -138,8 +139,8 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
||||
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
if !isNodeAboveTargetUtilization(nodeUsage) {
|
||||
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) {
|
||||
return false
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
|
||||
@@ -36,12 +36,19 @@ type NodeUsage struct {
|
||||
node *v1.Node
|
||||
usage map[v1.ResourceName]*resource.Quantity
|
||||
allPods []*v1.Pod
|
||||
}
|
||||
|
||||
type NodeThresholds struct {
|
||||
lowResourceThreshold map[v1.ResourceName]*resource.Quantity
|
||||
highResourceThreshold map[v1.ResourceName]*resource.Quantity
|
||||
}
|
||||
|
||||
type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
|
||||
type NodeInfo struct {
|
||||
NodeUsage
|
||||
thresholds NodeThresholds
|
||||
}
|
||||
|
||||
type continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
|
||||
|
||||
// NodePodsMap is a set of (node, pods) pairs
|
||||
type NodePodsMap map[*v1.Node][]*v1.Pod
|
||||
@@ -77,10 +84,35 @@ func validateThresholds(thresholds api.ResourceThresholds) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getNodeUsage(
|
||||
func getNodeThresholds(
|
||||
nodes []*v1.Node,
|
||||
lowThreshold, highThreshold api.ResourceThresholds,
|
||||
resourceNames []v1.ResourceName,
|
||||
) map[string]NodeThresholds {
|
||||
nodeThresholdsMap := map[string]NodeThresholds{}
|
||||
for _, node := range nodes {
|
||||
nodeCapacity := node.Status.Capacity
|
||||
if len(node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.Status.Allocatable
|
||||
}
|
||||
|
||||
nodeThresholdsMap[node.Name] = NodeThresholds{
|
||||
lowResourceThreshold: map[v1.ResourceName]*resource.Quantity{},
|
||||
highResourceThreshold: map[v1.ResourceName]*resource.Quantity{},
|
||||
}
|
||||
|
||||
for _, resourceName := range resourceNames {
|
||||
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, lowThreshold[resourceName])
|
||||
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, highThreshold[resourceName])
|
||||
}
|
||||
|
||||
}
|
||||
return nodeThresholdsMap
|
||||
}
|
||||
|
||||
func getNodeUsage(
|
||||
nodes []*v1.Node,
|
||||
resourceNames []v1.ResourceName,
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||
) []NodeUsage {
|
||||
var nodeUsageList []NodeUsage
|
||||
@@ -92,48 +124,37 @@ func getNodeUsage(
|
||||
continue
|
||||
}
|
||||
|
||||
// A threshold is in percentages but in <0;100> interval.
|
||||
// Performing `threshold * 0.01` will convert <0;100> interval into <0;1>.
|
||||
// Multiplying it with capacity will give fraction of the capacity corresponding to the given high/low resource threshold in Quantity units.
|
||||
nodeCapacity := node.Status.Capacity
|
||||
if len(node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.Status.Allocatable
|
||||
}
|
||||
lowResourceThreshold := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(lowThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !isBasicResource(name) {
|
||||
cap := nodeCapacity[name]
|
||||
lowResourceThreshold[name] = resource.NewQuantity(int64(float64(lowThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
highResourceThreshold := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(highThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(int64(float64(highThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(int64(float64(highThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !isBasicResource(name) {
|
||||
cap := nodeCapacity[name]
|
||||
highResourceThreshold[name] = resource.NewQuantity(int64(float64(highThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
|
||||
nodeUsageList = append(nodeUsageList, NodeUsage{
|
||||
node: node,
|
||||
usage: nodeUtilization(node, pods, resourceNames),
|
||||
allPods: pods,
|
||||
lowResourceThreshold: lowResourceThreshold,
|
||||
highResourceThreshold: highResourceThreshold,
|
||||
node: node,
|
||||
usage: nodeUtilization(node, pods, resourceNames),
|
||||
allPods: pods,
|
||||
})
|
||||
}
|
||||
|
||||
return nodeUsageList
|
||||
}
|
||||
|
||||
func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity {
|
||||
defaultFormat := resource.DecimalSI
|
||||
if resourceName == v1.ResourceMemory {
|
||||
defaultFormat = resource.BinarySI
|
||||
}
|
||||
|
||||
resourceCapacityFraction := func(resourceNodeCapacity int64) int64 {
|
||||
// A threshold is in percentages but in <0;100> interval.
|
||||
// Performing `threshold * 0.01` will convert <0;100> interval into <0;1>.
|
||||
// Multiplying it with capacity will give fraction of the capacity corresponding to the given resource threshold in Quantity units.
|
||||
return int64(float64(threshold) * 0.01 * float64(resourceNodeCapacity))
|
||||
}
|
||||
|
||||
resourceCapacityQuantity := nodeCapacity.Name(resourceName, defaultFormat)
|
||||
|
||||
if resourceName == v1.ResourceCPU {
|
||||
return resource.NewMilliQuantity(resourceCapacityFraction(resourceCapacityQuantity.MilliValue()), defaultFormat)
|
||||
}
|
||||
return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), defaultFormat)
|
||||
}
|
||||
|
||||
func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 {
|
||||
nodeCapacity := nodeUsage.node.Status.Capacity
|
||||
if len(nodeUsage.node.Status.Allocatable) > 0 {
|
||||
@@ -155,19 +176,24 @@ func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 {
|
||||
// low and high thresholds, it is simply ignored.
|
||||
func classifyNodes(
|
||||
nodeUsages []NodeUsage,
|
||||
lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage) bool,
|
||||
) ([]NodeUsage, []NodeUsage) {
|
||||
lowNodes, highNodes := []NodeUsage{}, []NodeUsage{}
|
||||
nodeThresholds map[string]NodeThresholds,
|
||||
lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool,
|
||||
) ([]NodeInfo, []NodeInfo) {
|
||||
lowNodes, highNodes := []NodeInfo{}, []NodeInfo{}
|
||||
|
||||
for _, nodeUsage := range nodeUsages {
|
||||
if lowThresholdFilter(nodeUsage.node, nodeUsage) {
|
||||
klog.V(2).InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
|
||||
lowNodes = append(lowNodes, nodeUsage)
|
||||
} else if highThresholdFilter(nodeUsage.node, nodeUsage) {
|
||||
klog.V(2).InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
|
||||
highNodes = append(highNodes, nodeUsage)
|
||||
nodeInfo := NodeInfo{
|
||||
NodeUsage: nodeUsage,
|
||||
thresholds: nodeThresholds[nodeUsage.node.Name],
|
||||
}
|
||||
if lowThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) {
|
||||
klog.InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
|
||||
lowNodes = append(lowNodes, nodeInfo)
|
||||
} else if highThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) {
|
||||
klog.InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
|
||||
highNodes = append(highNodes, nodeInfo)
|
||||
} else {
|
||||
klog.V(2).InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
|
||||
klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,7 +205,7 @@ func classifyNodes(
|
||||
// TODO: @ravig Break this function into smaller functions.
|
||||
func evictPodsFromSourceNodes(
|
||||
ctx context.Context,
|
||||
sourceNodes, destinationNodes []NodeUsage,
|
||||
sourceNodes, destinationNodes []NodeInfo,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
podFilter func(pod *v1.Pod) bool,
|
||||
resourceNames []v1.ResourceName,
|
||||
@@ -204,7 +230,7 @@ func evictPodsFromSourceNodes(
|
||||
if _, ok := totalAvailableUsage[name]; !ok {
|
||||
totalAvailableUsage[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||
}
|
||||
totalAvailableUsage[name].Add(*node.highResourceThreshold[name])
|
||||
totalAvailableUsage[name].Add(*node.thresholds.highResourceThreshold[name])
|
||||
totalAvailableUsage[name].Sub(*node.usage[name])
|
||||
}
|
||||
}
|
||||
@@ -244,7 +270,7 @@ func evictPodsFromSourceNodes(
|
||||
func evictPods(
|
||||
ctx context.Context,
|
||||
inputPods []*v1.Pod,
|
||||
nodeUsage NodeUsage,
|
||||
nodeInfo NodeInfo,
|
||||
totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
|
||||
taintsOfLowNodes map[string][]v1.Taint,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
@@ -252,14 +278,14 @@ func evictPods(
|
||||
continueEviction continueEvictionCond,
|
||||
) {
|
||||
|
||||
if continueEviction(nodeUsage, totalAvailableUsage) {
|
||||
if continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
for _, pod := range inputPods {
|
||||
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
||||
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
|
||||
continue
|
||||
}
|
||||
|
||||
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy)
|
||||
success, err := podEvictor.EvictPod(ctx, pod, nodeInfo.node, strategy)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
|
||||
break
|
||||
@@ -270,20 +296,20 @@ func evictPods(
|
||||
|
||||
for name := range totalAvailableUsage {
|
||||
if name == v1.ResourcePods {
|
||||
nodeUsage.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
} else {
|
||||
quantity := utils.GetResourceRequestQuantity(pod, name)
|
||||
nodeUsage.usage[name].Sub(quantity)
|
||||
nodeInfo.usage[name].Sub(quantity)
|
||||
totalAvailableUsage[name].Sub(quantity)
|
||||
}
|
||||
}
|
||||
|
||||
keysAndValues := []interface{}{
|
||||
"node", nodeUsage.node.Name,
|
||||
"CPU", nodeUsage.usage[v1.ResourceCPU].MilliValue(),
|
||||
"Mem", nodeUsage.usage[v1.ResourceMemory].Value(),
|
||||
"Pods", nodeUsage.usage[v1.ResourcePods].Value(),
|
||||
"node", nodeInfo.node.Name,
|
||||
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
|
||||
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
|
||||
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !isBasicResource(name) {
|
||||
@@ -293,7 +319,7 @@ func evictPods(
|
||||
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
// check if pods can be still evicted
|
||||
if !continueEviction(nodeUsage, totalAvailableUsage) {
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -302,7 +328,7 @@ func evictPods(
|
||||
}
|
||||
|
||||
// sortNodesByUsage sorts nodes based on usage in descending order
|
||||
func sortNodesByUsage(nodes []NodeUsage) {
|
||||
func sortNodesByUsage(nodes []NodeInfo) {
|
||||
sort.Slice(nodes, func(i, j int) bool {
|
||||
ti := nodes[i].usage[v1.ResourceMemory].Value() + nodes[i].usage[v1.ResourceCPU].MilliValue() + nodes[i].usage[v1.ResourcePods].Value()
|
||||
tj := nodes[j].usage[v1.ResourceMemory].Value() + nodes[j].usage[v1.ResourceCPU].MilliValue() + nodes[j].usage[v1.ResourcePods].Value()
|
||||
@@ -322,10 +348,10 @@ func sortNodesByUsage(nodes []NodeUsage) {
|
||||
|
||||
// isNodeAboveTargetUtilization checks if a node is overutilized
|
||||
// At least one resource has to be above the high threshold
|
||||
func isNodeAboveTargetUtilization(usage NodeUsage) bool {
|
||||
func isNodeAboveTargetUtilization(usage NodeUsage, threshold map[v1.ResourceName]*resource.Quantity) bool {
|
||||
for name, nodeValue := range usage.usage {
|
||||
// usage.highResourceThreshold[name] < nodeValue
|
||||
if usage.highResourceThreshold[name].Cmp(*nodeValue) == -1 {
|
||||
if threshold[name].Cmp(*nodeValue) == -1 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -334,10 +360,10 @@ func isNodeAboveTargetUtilization(usage NodeUsage) bool {
|
||||
|
||||
// isNodeWithLowUtilization checks if a node is underutilized
|
||||
// All resources have to be below the low threshold
|
||||
func isNodeWithLowUtilization(usage NodeUsage) bool {
|
||||
func isNodeWithLowUtilization(usage NodeUsage, threshold map[v1.ResourceName]*resource.Quantity) bool {
|
||||
for name, nodeValue := range usage.usage {
|
||||
// usage.lowResourceThreshold[name] < nodeValue
|
||||
if usage.lowResourceThreshold[name].Cmp(*nodeValue) == -1 {
|
||||
if threshold[name].Cmp(*nodeValue) == -1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user