1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-27 14:01:12 +01:00

chore: comment the code and simplify some things

this commit comments the low and high utilization plugins. this also
simplifies a little bit where it was possible without affecting too
much.
This commit is contained in:
Ricardo Maraschini
2025-03-19 19:56:36 +01:00
parent 87ba84b2ad
commit 54d0a22ad1
3 changed files with 671 additions and 406 deletions

View File

@@ -34,120 +34,115 @@ import (
const LowNodeUtilizationPluginName = "LowNodeUtilization"
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
// to calculate nodes' utilization and not the actual resource usage.
type LowNodeUtilization struct {
handle frameworktypes.Handle
args *LowNodeUtilizationArgs
podFilter func(pod *v1.Pod) bool
underutilizationCriteria []interface{}
overutilizationCriteria []interface{}
resourceNames []v1.ResourceName
extendedResourceNames []v1.ResourceName
usageClient usageClient
}
// this lines makes sure that HighNodeUtilization implements the BalancePlugin
// interface.
var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}
// NewLowNodeUtilization builds plugin from its arguments while passing a handle
func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) {
lowNodeUtilizationArgsArgs, ok := args.(*LowNodeUtilizationArgs)
// LowNodeUtilization evicts pods from overutilized nodes to underutilized
// nodes. Note that CPU/Memory requests are used to calculate nodes'
// utilization and not the actual resource usage.
type LowNodeUtilization struct {
handle frameworktypes.Handle
args *LowNodeUtilizationArgs
podFilter func(pod *v1.Pod) bool
underCriteria []any
overCriteria []any
resourceNames []v1.ResourceName
extendedResourceNames []v1.ResourceName
usageClient usageClient
}
// NewLowNodeUtilization builds plugin from its arguments while passing a
// handle. this plugin aims to move workload from overutilized nodes to
// underutilized nodes.
func NewLowNodeUtilization(
genericArgs runtime.Object, handle frameworktypes.Handle,
) (frameworktypes.Plugin, error) {
args, ok := genericArgs.(*LowNodeUtilizationArgs)
if !ok {
return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args)
return nil, fmt.Errorf(
"want args to be of type LowNodeUtilizationArgs, got %T",
genericArgs,
)
}
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
// resourceNames holds a list of resources for which the user has
// provided thresholds for. extendedResourceNames holds those as well
// as cpu, memory and pods if no prometheus collection is used.
resourceNames := getResourceNames(args.Thresholds)
extendedResourceNames := resourceNames
metricsUtilization := lowNodeUtilizationArgsArgs.MetricsUtilization
if metricsUtilization != nil && metricsUtilization.Source == api.PrometheusMetrics {
if metricsUtilization.Prometheus != nil && metricsUtilization.Prometheus.Query != "" {
uResourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
oResourceNames := getResourceNames(lowNodeUtilizationArgsArgs.TargetThresholds)
if len(uResourceNames) != 1 || uResourceNames[0] != MetricResource {
return nil, fmt.Errorf("thresholds are expected to specify a single instance of %q resource, got %v instead", MetricResource, uResourceNames)
}
if len(oResourceNames) != 1 || oResourceNames[0] != MetricResource {
return nil, fmt.Errorf("targetThresholds are expected to specify a single instance of %q resource, got %v instead", MetricResource, oResourceNames)
}
} else {
return nil, fmt.Errorf("prometheus query is missing")
// if we are using prometheus we need to validate we have everything we
// need. if we aren't then we need to make sure we are also collecting
// data for cpu, memory and pods.
metrics := args.MetricsUtilization
if metrics != nil && metrics.Source == api.PrometheusMetrics {
if err := validatePrometheusMetricsUtilization(args); err != nil {
return nil, err
}
} else {
extendedResourceNames = uniquifyResourceNames(append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods))
extendedResourceNames = uniquifyResourceNames(
append(
resourceNames,
v1.ResourceCPU,
v1.ResourceMemory,
v1.ResourcePods,
),
)
}
underutilizationCriteria := []interface{}{
"CPU", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourceCPU],
"Mem", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourceMemory],
"Pods", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourcePods],
// underCriteria and overCriteria are slices used for logging purposes.
// we assemble them only once.
underCriteria, overCriteria := []any{}, []any{}
for name := range args.Thresholds {
underCriteria = append(underCriteria, name, args.Thresholds[name])
}
for name := range lowNodeUtilizationArgsArgs.Thresholds {
if !nodeutil.IsBasicResource(name) {
underutilizationCriteria = append(underutilizationCriteria, string(name), int64(lowNodeUtilizationArgsArgs.Thresholds[name]))
}
for name := range args.TargetThresholds {
overCriteria = append(overCriteria, name, args.TargetThresholds[name])
}
overutilizationCriteria := []interface{}{
"CPU", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourceCPU],
"Mem", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourceMemory],
"Pods", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourcePods],
}
for name := range lowNodeUtilizationArgsArgs.TargetThresholds {
if !nodeutil.IsBasicResource(name) {
overutilizationCriteria = append(overutilizationCriteria, string(name), int64(lowNodeUtilizationArgsArgs.TargetThresholds[name]))
}
}
podFilter, err := podutil.NewOptions().
podFilter, err := podutil.
NewOptions().
WithFilter(handle.Evictor().Filter).
BuildFilterFunc()
if err != nil {
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
}
var usageClient usageClient
// MetricsServer is deprecated, removed once dropped
if metricsUtilization != nil {
switch {
case metricsUtilization.MetricsServer, metricsUtilization.Source == api.KubernetesMetrics:
if handle.MetricsCollector() == nil {
return nil, fmt.Errorf("metrics client not initialized")
}
usageClient = newActualUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
case metricsUtilization.Source == api.PrometheusMetrics:
if handle.PrometheusClient() == nil {
return nil, fmt.Errorf("prometheus client not initialized")
}
usageClient = newPrometheusUsageClient(handle.GetPodsAssignedToNodeFunc(), handle.PrometheusClient(), metricsUtilization.Prometheus.Query)
case metricsUtilization.Source != "":
return nil, fmt.Errorf("unrecognized metrics source")
default:
return nil, fmt.Errorf("metrics source is empty")
// this plugins supports different ways of collecting usage data. each
// different way provides its own "usageClient". here we make sure we
// have the correct one or an error is triggered. XXX MetricsServer is
// deprecated, removed once dropped.
var usageClient usageClient = newRequestedUsageClient(
extendedResourceNames, handle.GetPodsAssignedToNodeFunc(),
)
if metrics != nil {
usageClient, err = usageClientForMetrics(args, handle, extendedResourceNames)
if err != nil {
return nil, err
}
} else {
usageClient = newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc())
}
return &LowNodeUtilization{
handle: handle,
args: lowNodeUtilizationArgsArgs,
underutilizationCriteria: underutilizationCriteria,
overutilizationCriteria: overutilizationCriteria,
resourceNames: resourceNames,
extendedResourceNames: extendedResourceNames,
podFilter: podFilter,
usageClient: usageClient,
handle: handle,
args: args,
underCriteria: underCriteria,
overCriteria: overCriteria,
resourceNames: resourceNames,
extendedResourceNames: extendedResourceNames,
podFilter: podFilter,
usageClient: usageClient,
}, nil
}
// Name retrieves the plugin name
// Name retrieves the plugin name.
func (l *LowNodeUtilization) Name() string {
return LowNodeUtilizationPluginName
}
// Balance extension point implementation for the plugin
// Balance holds the main logic of the plugin. It evicts pods from over
// utilized nodes to under utilized nodes. The goal here is to evenly
// distribute pods across nodes.
func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
if err := l.usageClient.sync(ctx, nodes); err != nil {
return &frameworktypes.Status{
@@ -155,75 +150,100 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
}
}
// starts by taking a snapshot ofthe nodes usage. we will use this
// snapshot to assess the nodes usage and classify them as
// underutilized or overutilized.
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient)
capacities := referencedResourceListForNodesCapacity(nodes)
// usage, by default, is exposed in absolute values. we need to normalize
// them (convert them to percentages) to be able to compare them with the
// user provided thresholds. thresholds are already provided in percentage
// in the <0; 100> interval.
var usage map[string]api.ResourceThresholds
var thresholds map[string][]api.ResourceThresholds
if l.args.UseDeviationThresholds {
// here the thresholds provided by the user represent
// deviations from the average so we need to treat them
// differently. when calculating the average we only
// need to consider the resources for which the user
// has provided thresholds.
usage, thresholds = assessNodesUsagesAndRelativeThresholds(
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
filterResourceNames(nodesUsageMap, l.resourceNames),
capacities,
l.args.Thresholds,
l.args.TargetThresholds,
)
} else {
usage, thresholds = assessNodesUsagesAndStaticThresholds(
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
nodesUsageMap,
capacities,
l.args.Thresholds,
l.args.TargetThresholds,
)
}
// classify nodes in under and over utilized. we will later try to move
// pods from the overutilized nodes to the underutilized ones.
nodeGroups := classifyNodeUsage(
usage,
thresholds,
usage, thresholds,
[]classifierFnc{
// underutilization
// underutilization criteria processing. nodes that are
// underutilized but aren't schedulable are ignored.
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(nodesMap[nodeName]))
klog.V(2).InfoS(
"Node is unschedulable, thus not considered as underutilized",
"node", klog.KObj(nodesMap[nodeName]),
)
return false
}
return isNodeBelowThreshold(usage, threshold)
},
// overutilization
// overutilization criteria evaluation.
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
return isNodeAboveThreshold(usage, threshold)
},
},
)
// convert groups node []NodeInfo
// the nodeutilization package was designed to work with NodeInfo
// structs. these structs holds information about how utilized a node
// is. we need to go through the result of the classification and turn
// it into NodeInfo structs.
nodeInfos := make([][]NodeInfo, 2)
category := []string{"underutilized", "overutilized"}
listedNodes := map[string]struct{}{}
categories := []string{"underutilized", "overutilized"}
classifiedNodes := map[string]bool{}
for i := range nodeGroups {
for nodeName := range nodeGroups[i] {
classifiedNodes[nodeName] = true
klog.InfoS(
fmt.Sprintf("Node is %s", category[i]),
"Node has been classified",
"category", categories[i],
"node", klog.KObj(nodesMap[nodeName]),
"usage", nodesUsageMap[nodeName],
"usagePercentage", normalizer.Round(usage[nodeName]),
)
listedNodes[nodeName] = struct{}{}
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
NodeUsage: NodeUsage{
node: nodesMap[nodeName],
usage: nodesUsageMap[nodeName], // get back the original node usage
usage: nodesUsageMap[nodeName],
allPods: podListMap[nodeName],
},
thresholds: NodeThresholds{
lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
},
available: capNodeCapacitiesToThreshold(
nodesMap[nodeName],
thresholds[nodeName][1],
l.extendedResourceNames,
),
})
}
}
// log nodes that are appropriately utilized.
for nodeName := range nodesMap {
if _, ok := listedNodes[nodeName]; !ok {
if !classifiedNodes[nodeName] {
klog.InfoS(
"Node is appropriately utilized",
"node", klog.KObj(nodesMap[nodeName]),
@@ -233,24 +253,27 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
}
}
lowNodes := nodeInfos[0]
sourceNodes := nodeInfos[1]
lowNodes, highNodes := nodeInfos[0], nodeInfos[1]
// log message for nodes with low utilization
klog.V(1).InfoS("Criteria for a node under utilization", l.underutilizationCriteria...)
// log messages for nodes with low and high utilization
klog.V(1).InfoS("Criteria for a node under utilization", l.underCriteria...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
// log message for over utilized nodes
klog.V(1).InfoS("Criteria for a node above target utilization", l.overutilizationCriteria...)
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
klog.V(1).InfoS("Criteria for a node above target utilization", l.overCriteria...)
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(highNodes))
if len(lowNodes) == 0 {
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
klog.V(1).InfoS(
"No node is underutilized, nothing to do here, you might tune your thresholds further",
)
return nil
}
if len(lowNodes) <= l.args.NumberOfNodes {
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", l.args.NumberOfNodes)
klog.V(1).InfoS(
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
"underutilizedNodes", len(lowNodes),
"numberOfNodes", l.args.NumberOfNodes,
)
return nil
}
@@ -259,14 +282,15 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
return nil
}
if len(sourceNodes) == 0 {
if len(highNodes) == 0 {
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
return nil
}
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
// this is a stop condition for the eviction process. we stop as soon
// as the node usage drops below the threshold.
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage api.ReferencedResourceList) bool {
if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) {
if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.available) {
return false
}
for name := range totalAvailableUsage {
@@ -278,8 +302,8 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
return true
}
// Sort the nodes by the usage in descending order
sortNodesByUsage(sourceNodes, false)
// sort the nodes by the usage in descending order
sortNodesByUsage(highNodes, false)
var nodeLimit *uint
if l.args.EvictionLimits != nil {
@@ -289,7 +313,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
evictPodsFromSourceNodes(
ctx,
l.args.EvictableNamespaces,
sourceNodes,
highNodes,
lowNodes,
l.handle.Evictor(),
evictions.EvictOptions{StrategyName: LowNodeUtilizationPluginName},
@@ -302,3 +326,66 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
return nil
}
// validatePrometheusMetricsUtilization validates the Prometheus metrics
// utilization. XXX this should be done way earlier than this.
func validatePrometheusMetricsUtilization(args *LowNodeUtilizationArgs) error {
if args.MetricsUtilization.Prometheus == nil {
return fmt.Errorf("prometheus property is missing")
}
if args.MetricsUtilization.Prometheus.Query == "" {
return fmt.Errorf("prometheus query is missing")
}
uResourceNames := getResourceNames(args.Thresholds)
oResourceNames := getResourceNames(args.TargetThresholds)
if len(uResourceNames) != 1 || uResourceNames[0] != MetricResource {
return fmt.Errorf(
"thresholds are expected to specify a single instance of %q resource, got %v instead",
MetricResource, uResourceNames,
)
}
if len(oResourceNames) != 1 || oResourceNames[0] != MetricResource {
return fmt.Errorf(
"targetThresholds are expected to specify a single instance of %q resource, got %v instead",
MetricResource, oResourceNames,
)
}
return nil
}
// usageClientForMetrics returns the correct usage client based on the
// metrics source. XXX MetricsServer is deprecated, removed once dropped.
func usageClientForMetrics(
args *LowNodeUtilizationArgs, handle frameworktypes.Handle, resources []v1.ResourceName,
) (usageClient, error) {
metrics := args.MetricsUtilization
switch {
case metrics.MetricsServer, metrics.Source == api.KubernetesMetrics:
if handle.MetricsCollector() == nil {
return nil, fmt.Errorf("metrics client not initialized")
}
return newActualUsageClient(
resources,
handle.GetPodsAssignedToNodeFunc(),
handle.MetricsCollector(),
), nil
case metrics.Source == api.PrometheusMetrics:
if handle.PrometheusClient() == nil {
return nil, fmt.Errorf("prometheus client not initialized")
}
return newPrometheusUsageClient(
handle.GetPodsAssignedToNodeFunc(),
handle.PrometheusClient(),
metrics.Prometheus.Query,
), nil
case metrics.Source != "":
return nil, fmt.Errorf("unrecognized metrics source")
default:
return nil, fmt.Errorf("metrics source is empty")
}
}