mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 21:31:18 +01:00
feat: refactoring thresholds and usage assessment
this commit refactors the thresholds and usage assessment for the node utilization plugins. both high and low plugins are affected by this change.
This commit is contained in:
@@ -28,6 +28,7 @@ import (
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
|
||||
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
|
||||
)
|
||||
|
||||
@@ -43,6 +44,7 @@ type LowNodeUtilization struct {
|
||||
underutilizationCriteria []interface{}
|
||||
overutilizationCriteria []interface{}
|
||||
resourceNames []v1.ResourceName
|
||||
extendedResourceNames []v1.ResourceName
|
||||
usageClient usageClient
|
||||
}
|
||||
|
||||
@@ -55,6 +57,9 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
|
||||
return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args)
|
||||
}
|
||||
|
||||
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
|
||||
extendedResourceNames := resourceNames
|
||||
|
||||
metricsUtilization := lowNodeUtilizationArgsArgs.MetricsUtilization
|
||||
if metricsUtilization != nil && metricsUtilization.Source == api.PrometheusMetrics {
|
||||
if metricsUtilization.Prometheus != nil && metricsUtilization.Prometheus.Query != "" {
|
||||
@@ -70,7 +75,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
|
||||
return nil, fmt.Errorf("prometheus query is missing")
|
||||
}
|
||||
} else {
|
||||
setDefaultForLNUThresholds(lowNodeUtilizationArgsArgs.Thresholds, lowNodeUtilizationArgsArgs.TargetThresholds, lowNodeUtilizationArgsArgs.UseDeviationThresholds)
|
||||
extendedResourceNames = uniquifyResourceNames(append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods))
|
||||
}
|
||||
|
||||
underutilizationCriteria := []interface{}{
|
||||
@@ -102,8 +107,6 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
|
||||
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
|
||||
}
|
||||
|
||||
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
|
||||
|
||||
var usageClient usageClient
|
||||
// MetricsServer is deprecated, removed once dropped
|
||||
if metricsUtilization != nil {
|
||||
@@ -112,7 +115,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
|
||||
if handle.MetricsCollector() == nil {
|
||||
return nil, fmt.Errorf("metrics client not initialized")
|
||||
}
|
||||
usageClient = newActualUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
|
||||
usageClient = newActualUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
|
||||
case metricsUtilization.Source == api.PrometheusMetrics:
|
||||
if handle.PrometheusClient() == nil {
|
||||
return nil, fmt.Errorf("prometheus client not initialized")
|
||||
@@ -124,7 +127,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
|
||||
return nil, fmt.Errorf("metrics source is empty")
|
||||
}
|
||||
} else {
|
||||
usageClient = newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc())
|
||||
usageClient = newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc())
|
||||
}
|
||||
|
||||
return &LowNodeUtilization{
|
||||
@@ -133,6 +136,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
|
||||
underutilizationCriteria: underutilizationCriteria,
|
||||
overutilizationCriteria: overutilizationCriteria,
|
||||
resourceNames: resourceNames,
|
||||
extendedResourceNames: extendedResourceNames,
|
||||
podFilter: podFilter,
|
||||
usageClient: usageClient,
|
||||
}, nil
|
||||
@@ -152,25 +156,29 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
}
|
||||
|
||||
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient)
|
||||
var nodeThresholdsMap map[string][]api.ResourceThresholds
|
||||
capacities := referencedResourceListForNodesCapacity(nodes)
|
||||
|
||||
var usage map[string]api.ResourceThresholds
|
||||
var thresholds map[string][]api.ResourceThresholds
|
||||
if l.args.UseDeviationThresholds {
|
||||
thresholds, average := getNodeThresholdsFromAverageNodeUsage(nodes, l.usageClient, l.args.Thresholds, l.args.TargetThresholds)
|
||||
klog.InfoS("Average utilization through all nodes", "utilization", average)
|
||||
// All nodes are expected to have the same thresholds
|
||||
for nodeName := range thresholds {
|
||||
klog.InfoS("Underutilization threshold based on average utilization", "threshold", thresholds[nodeName][0])
|
||||
klog.InfoS("Overutilization threshold based on average utilization", "threshold", thresholds[nodeName][1])
|
||||
break
|
||||
}
|
||||
nodeThresholdsMap = thresholds
|
||||
usage, thresholds = assessNodesUsagesAndRelativeThresholds(
|
||||
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
|
||||
capacities,
|
||||
l.args.Thresholds,
|
||||
l.args.TargetThresholds,
|
||||
)
|
||||
} else {
|
||||
nodeThresholdsMap = getStaticNodeThresholds(nodes, l.args.Thresholds, l.args.TargetThresholds)
|
||||
usage, thresholds = assessNodesUsagesAndStaticThresholds(
|
||||
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
|
||||
capacities,
|
||||
l.args.Thresholds,
|
||||
l.args.TargetThresholds,
|
||||
)
|
||||
}
|
||||
|
||||
nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap)
|
||||
nodeGroups := classifyNodeUsage(
|
||||
nodesUsageAsNodeThresholdsMap,
|
||||
nodeThresholdsMap,
|
||||
usage,
|
||||
thresholds,
|
||||
[]classifierFnc{
|
||||
// underutilization
|
||||
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
|
||||
@@ -193,7 +201,12 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
listedNodes := map[string]struct{}{}
|
||||
for i := range nodeGroups {
|
||||
for nodeName := range nodeGroups[i] {
|
||||
klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
|
||||
klog.InfoS(
|
||||
fmt.Sprintf("Node is %s", category[i]),
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
"usage", nodesUsageMap[nodeName],
|
||||
"usagePercentage", normalizer.Round(usage[nodeName]),
|
||||
)
|
||||
listedNodes[nodeName] = struct{}{}
|
||||
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
|
||||
NodeUsage: NodeUsage{
|
||||
@@ -202,15 +215,21 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
allPods: podListMap[nodeName],
|
||||
},
|
||||
thresholds: NodeThresholds{
|
||||
lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]),
|
||||
highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]),
|
||||
lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
|
||||
highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for nodeName := range nodesMap {
|
||||
if _, ok := listedNodes[nodeName]; !ok {
|
||||
klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
|
||||
klog.InfoS(
|
||||
"Node is appropriately utilized",
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
"usage", nodesUsageMap[nodeName],
|
||||
"usagePercentage", normalizer.Round(usage[nodeName]),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -275,7 +294,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
l.handle.Evictor(),
|
||||
evictions.EvictOptions{StrategyName: LowNodeUtilizationPluginName},
|
||||
l.podFilter,
|
||||
l.resourceNames,
|
||||
l.extendedResourceNames,
|
||||
continueEvictionCond,
|
||||
l.usageClient,
|
||||
nodeLimit,
|
||||
@@ -283,34 +302,3 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setDefaultForLNUThresholds(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) {
|
||||
// check if Pods/CPU/Mem are set, if not, set them to 100
|
||||
if _, ok := thresholds[v1.ResourcePods]; !ok {
|
||||
if useDeviationThresholds {
|
||||
thresholds[v1.ResourcePods] = MinResourcePercentage
|
||||
targetThresholds[v1.ResourcePods] = MinResourcePercentage
|
||||
} else {
|
||||
thresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
if _, ok := thresholds[v1.ResourceCPU]; !ok {
|
||||
if useDeviationThresholds {
|
||||
thresholds[v1.ResourceCPU] = MinResourcePercentage
|
||||
targetThresholds[v1.ResourceCPU] = MinResourcePercentage
|
||||
} else {
|
||||
thresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
if _, ok := thresholds[v1.ResourceMemory]; !ok {
|
||||
if useDeviationThresholds {
|
||||
thresholds[v1.ResourceMemory] = MinResourcePercentage
|
||||
targetThresholds[v1.ResourceMemory] = MinResourcePercentage
|
||||
} else {
|
||||
thresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user