1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 21:31:18 +01:00

feat: refactoring thresholds and usage assessment

this commit refactors the thresholds and usage assessment for the node
utilization plugins. both high and low plugins are affected by this
change.
This commit is contained in:
Ricardo Maraschini
2025-03-13 20:52:20 +01:00
parent b300faece0
commit 87ba84b2ad
9 changed files with 1233 additions and 240 deletions

View File

@@ -28,6 +28,7 @@ import (
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
)
@@ -43,6 +44,7 @@ type LowNodeUtilization struct {
underutilizationCriteria []interface{}
overutilizationCriteria []interface{}
resourceNames []v1.ResourceName
extendedResourceNames []v1.ResourceName
usageClient usageClient
}
@@ -55,6 +57,9 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args)
}
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
extendedResourceNames := resourceNames
metricsUtilization := lowNodeUtilizationArgsArgs.MetricsUtilization
if metricsUtilization != nil && metricsUtilization.Source == api.PrometheusMetrics {
if metricsUtilization.Prometheus != nil && metricsUtilization.Prometheus.Query != "" {
@@ -70,7 +75,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
return nil, fmt.Errorf("prometheus query is missing")
}
} else {
setDefaultForLNUThresholds(lowNodeUtilizationArgsArgs.Thresholds, lowNodeUtilizationArgsArgs.TargetThresholds, lowNodeUtilizationArgsArgs.UseDeviationThresholds)
extendedResourceNames = uniquifyResourceNames(append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods))
}
underutilizationCriteria := []interface{}{
@@ -102,8 +107,6 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
}
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
var usageClient usageClient
// MetricsServer is deprecated, removed once dropped
if metricsUtilization != nil {
@@ -112,7 +115,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
if handle.MetricsCollector() == nil {
return nil, fmt.Errorf("metrics client not initialized")
}
usageClient = newActualUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
usageClient = newActualUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
case metricsUtilization.Source == api.PrometheusMetrics:
if handle.PrometheusClient() == nil {
return nil, fmt.Errorf("prometheus client not initialized")
@@ -124,7 +127,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
return nil, fmt.Errorf("metrics source is empty")
}
} else {
usageClient = newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc())
usageClient = newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc())
}
return &LowNodeUtilization{
@@ -133,6 +136,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
underutilizationCriteria: underutilizationCriteria,
overutilizationCriteria: overutilizationCriteria,
resourceNames: resourceNames,
extendedResourceNames: extendedResourceNames,
podFilter: podFilter,
usageClient: usageClient,
}, nil
@@ -152,25 +156,29 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
}
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient)
var nodeThresholdsMap map[string][]api.ResourceThresholds
capacities := referencedResourceListForNodesCapacity(nodes)
var usage map[string]api.ResourceThresholds
var thresholds map[string][]api.ResourceThresholds
if l.args.UseDeviationThresholds {
thresholds, average := getNodeThresholdsFromAverageNodeUsage(nodes, l.usageClient, l.args.Thresholds, l.args.TargetThresholds)
klog.InfoS("Average utilization through all nodes", "utilization", average)
// All nodes are expected to have the same thresholds
for nodeName := range thresholds {
klog.InfoS("Underutilization threshold based on average utilization", "threshold", thresholds[nodeName][0])
klog.InfoS("Overutilization threshold based on average utilization", "threshold", thresholds[nodeName][1])
break
}
nodeThresholdsMap = thresholds
usage, thresholds = assessNodesUsagesAndRelativeThresholds(
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
capacities,
l.args.Thresholds,
l.args.TargetThresholds,
)
} else {
nodeThresholdsMap = getStaticNodeThresholds(nodes, l.args.Thresholds, l.args.TargetThresholds)
usage, thresholds = assessNodesUsagesAndStaticThresholds(
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
capacities,
l.args.Thresholds,
l.args.TargetThresholds,
)
}
nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap)
nodeGroups := classifyNodeUsage(
nodesUsageAsNodeThresholdsMap,
nodeThresholdsMap,
usage,
thresholds,
[]classifierFnc{
// underutilization
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
@@ -193,7 +201,12 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
listedNodes := map[string]struct{}{}
for i := range nodeGroups {
for nodeName := range nodeGroups[i] {
klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
klog.InfoS(
fmt.Sprintf("Node is %s", category[i]),
"node", klog.KObj(nodesMap[nodeName]),
"usage", nodesUsageMap[nodeName],
"usagePercentage", normalizer.Round(usage[nodeName]),
)
listedNodes[nodeName] = struct{}{}
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
NodeUsage: NodeUsage{
@@ -202,15 +215,21 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
allPods: podListMap[nodeName],
},
thresholds: NodeThresholds{
lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]),
highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]),
lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
},
})
}
}
for nodeName := range nodesMap {
if _, ok := listedNodes[nodeName]; !ok {
klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
klog.InfoS(
"Node is appropriately utilized",
"node", klog.KObj(nodesMap[nodeName]),
"usage", nodesUsageMap[nodeName],
"usagePercentage", normalizer.Round(usage[nodeName]),
)
}
}
@@ -275,7 +294,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
l.handle.Evictor(),
evictions.EvictOptions{StrategyName: LowNodeUtilizationPluginName},
l.podFilter,
l.resourceNames,
l.extendedResourceNames,
continueEvictionCond,
l.usageClient,
nodeLimit,
@@ -283,34 +302,3 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
return nil
}
func setDefaultForLNUThresholds(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) {
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
if useDeviationThresholds {
thresholds[v1.ResourcePods] = MinResourcePercentage
targetThresholds[v1.ResourcePods] = MinResourcePercentage
} else {
thresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
}
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
if useDeviationThresholds {
thresholds[v1.ResourceCPU] = MinResourcePercentage
targetThresholds[v1.ResourceCPU] = MinResourcePercentage
} else {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
}
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
if useDeviationThresholds {
thresholds[v1.ResourceMemory] = MinResourcePercentage
targetThresholds[v1.ResourceMemory] = MinResourcePercentage
} else {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
}
}
}