chore: comment the code and simplify some things

this commit comments the low and high utilization plugins. this also simplifies a little bit where it was possible without affecting too much.
2026-01-27 14:01:12 +01:00 · 2025-03-19 19:56:36 +01:00
parent 87ba84b2ad
commit 54d0a22ad1
3 changed files with 671 additions and 406 deletions
--- a/pkg/framework/plugins/nodeutilization/lownodeutilization.go
+++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go
@@ -34,120 +34,115 @@ import (

 const LowNodeUtilizationPluginName = "LowNodeUtilization"

-// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
-// to calculate nodes' utilization and not the actual resource usage.
-
-type LowNodeUtilization struct {
-	handle                   frameworktypes.Handle
-	args                     *LowNodeUtilizationArgs
-	podFilter                func(pod *v1.Pod) bool
-	underutilizationCriteria []interface{}
-	overutilizationCriteria  []interface{}
-	resourceNames            []v1.ResourceName
-	extendedResourceNames    []v1.ResourceName
-	usageClient              usageClient
-}
-
+// this lines makes sure that HighNodeUtilization implements the BalancePlugin
+// interface.
 var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}

-// NewLowNodeUtilization builds plugin from its arguments while passing a handle
-func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) {
-	lowNodeUtilizationArgsArgs, ok := args.(*LowNodeUtilizationArgs)
+// LowNodeUtilization evicts pods from overutilized nodes to underutilized
+// nodes. Note that CPU/Memory requests are used to calculate nodes'
+// utilization and not the actual resource usage.
+type LowNodeUtilization struct {
+	handle                frameworktypes.Handle
+	args                  *LowNodeUtilizationArgs
+	podFilter             func(pod *v1.Pod) bool
+	underCriteria         []any
+	overCriteria          []any
+	resourceNames         []v1.ResourceName
+	extendedResourceNames []v1.ResourceName
+	usageClient           usageClient
+}
+
+// NewLowNodeUtilization builds plugin from its arguments while passing a
+// handle. this plugin aims to move workload from overutilized nodes to
+// underutilized nodes.
+func NewLowNodeUtilization(
+	genericArgs runtime.Object, handle frameworktypes.Handle,
+) (frameworktypes.Plugin, error) {
+	args, ok := genericArgs.(*LowNodeUtilizationArgs)
 	if !ok {
-		return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args)
+		return nil, fmt.Errorf(
+			"want args to be of type LowNodeUtilizationArgs, got %T",
+			genericArgs,
+		)
 	}

-	resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
+	// resourceNames holds a list of resources for which the user has
+	// provided thresholds for. extendedResourceNames holds those as well
+	// as cpu, memory and pods if no prometheus collection is used.
+	resourceNames := getResourceNames(args.Thresholds)
 	extendedResourceNames := resourceNames

-	metricsUtilization := lowNodeUtilizationArgsArgs.MetricsUtilization
-	if metricsUtilization != nil && metricsUtilization.Source == api.PrometheusMetrics {
-		if metricsUtilization.Prometheus != nil && metricsUtilization.Prometheus.Query != "" {
-			uResourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
-			oResourceNames := getResourceNames(lowNodeUtilizationArgsArgs.TargetThresholds)
-			if len(uResourceNames) != 1 || uResourceNames[0] != MetricResource {
-				return nil, fmt.Errorf("thresholds are expected to specify a single instance of %q resource, got %v instead", MetricResource, uResourceNames)
-			}
-			if len(oResourceNames) != 1 || oResourceNames[0] != MetricResource {
-				return nil, fmt.Errorf("targetThresholds are expected to specify a single instance of %q resource, got %v instead", MetricResource, oResourceNames)
-			}
-		} else {
-			return nil, fmt.Errorf("prometheus query is missing")
+	// if we are using prometheus we need to validate we have everything we
+	// need. if we aren't then we need to make sure we are also collecting
+	// data for cpu, memory and pods.
+	metrics := args.MetricsUtilization
+	if metrics != nil && metrics.Source == api.PrometheusMetrics {
+		if err := validatePrometheusMetricsUtilization(args); err != nil {
+			return nil, err
 		}
 	} else {
-		extendedResourceNames = uniquifyResourceNames(append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods))
+		extendedResourceNames = uniquifyResourceNames(
+			append(
+				resourceNames,
+				v1.ResourceCPU,
+				v1.ResourceMemory,
+				v1.ResourcePods,
+			),
+		)
 	}

-	underutilizationCriteria := []interface{}{
-		"CPU", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourceCPU],
-		"Mem", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourceMemory],
-		"Pods", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourcePods],
+	// underCriteria and overCriteria are slices used for logging purposes.
+	// we assemble them only once.
+	underCriteria, overCriteria := []any{}, []any{}
+	for name := range args.Thresholds {
+		underCriteria = append(underCriteria, name, args.Thresholds[name])
 	}
-	for name := range lowNodeUtilizationArgsArgs.Thresholds {
-		if !nodeutil.IsBasicResource(name) {
-			underutilizationCriteria = append(underutilizationCriteria, string(name), int64(lowNodeUtilizationArgsArgs.Thresholds[name]))
-		}
+	for name := range args.TargetThresholds {
+		overCriteria = append(overCriteria, name, args.TargetThresholds[name])
 	}

-	overutilizationCriteria := []interface{}{
-		"CPU", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourceCPU],
-		"Mem", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourceMemory],
-		"Pods", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourcePods],
-	}
-	for name := range lowNodeUtilizationArgsArgs.TargetThresholds {
-		if !nodeutil.IsBasicResource(name) {
-			overutilizationCriteria = append(overutilizationCriteria, string(name), int64(lowNodeUtilizationArgsArgs.TargetThresholds[name]))
-		}
-	}
-
-	podFilter, err := podutil.NewOptions().
+	podFilter, err := podutil.
+		NewOptions().
 		WithFilter(handle.Evictor().Filter).
 		BuildFilterFunc()
 	if err != nil {
 		return nil, fmt.Errorf("error initializing pod filter function: %v", err)
 	}

-	var usageClient usageClient
-	// MetricsServer is deprecated, removed once dropped
-	if metricsUtilization != nil {
-		switch {
-		case metricsUtilization.MetricsServer, metricsUtilization.Source == api.KubernetesMetrics:
-			if handle.MetricsCollector() == nil {
-				return nil, fmt.Errorf("metrics client not initialized")
-			}
-			usageClient = newActualUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
-		case metricsUtilization.Source == api.PrometheusMetrics:
-			if handle.PrometheusClient() == nil {
-				return nil, fmt.Errorf("prometheus client not initialized")
-			}
-			usageClient = newPrometheusUsageClient(handle.GetPodsAssignedToNodeFunc(), handle.PrometheusClient(), metricsUtilization.Prometheus.Query)
-		case metricsUtilization.Source != "":
-			return nil, fmt.Errorf("unrecognized metrics source")
-		default:
-			return nil, fmt.Errorf("metrics source is empty")
+	// this plugins supports different ways of collecting usage data. each
+	// different way provides its own "usageClient". here we make sure we
+	// have the correct one or an error is triggered. XXX MetricsServer is
+	// deprecated, removed once dropped.
+	var usageClient usageClient = newRequestedUsageClient(
+		extendedResourceNames, handle.GetPodsAssignedToNodeFunc(),
+	)
+	if metrics != nil {
+		usageClient, err = usageClientForMetrics(args, handle, extendedResourceNames)
+		if err != nil {
+			return nil, err
 		}
-	} else {
-		usageClient = newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc())
 	}

 	return &LowNodeUtilization{
-		handle:                   handle,
-		args:                     lowNodeUtilizationArgsArgs,
-		underutilizationCriteria: underutilizationCriteria,
-		overutilizationCriteria:  overutilizationCriteria,
-		resourceNames:            resourceNames,
-		extendedResourceNames:    extendedResourceNames,
-		podFilter:                podFilter,
-		usageClient:              usageClient,
+		handle:                handle,
+		args:                  args,
+		underCriteria:         underCriteria,
+		overCriteria:          overCriteria,
+		resourceNames:         resourceNames,
+		extendedResourceNames: extendedResourceNames,
+		podFilter:             podFilter,
+		usageClient:           usageClient,
 	}, nil
 }

-// Name retrieves the plugin name
+// Name retrieves the plugin name.
 func (l *LowNodeUtilization) Name() string {
 	return LowNodeUtilizationPluginName
 }

-// Balance extension point implementation for the plugin
+// Balance holds the main logic of the plugin. It evicts pods from over
+// utilized nodes to under utilized nodes. The goal here is to evenly
+// distribute pods across nodes.
 func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
 	if err := l.usageClient.sync(ctx, nodes); err != nil {
 		return &frameworktypes.Status{
@@ -155,75 +150,100 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
 		}
 	}

+	// starts by taking a snapshot ofthe nodes usage. we will use this
+	// snapshot to assess the nodes usage and classify them as
+	// underutilized or overutilized.
 	nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient)
 	capacities := referencedResourceListForNodesCapacity(nodes)

+	// usage, by default, is exposed in absolute values. we need to normalize
+	// them (convert them to percentages) to be able to compare them with the
+	// user provided thresholds. thresholds are already provided in percentage
+	// in the <0; 100> interval.
 	var usage map[string]api.ResourceThresholds
 	var thresholds map[string][]api.ResourceThresholds
 	if l.args.UseDeviationThresholds {
+		// here the thresholds provided by the user represent
+		// deviations from the average so we need to treat them
+		// differently. when calculating the average we only
+		// need to consider the resources for which the user
+		// has provided thresholds.
 		usage, thresholds = assessNodesUsagesAndRelativeThresholds(
-			filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
+			filterResourceNames(nodesUsageMap, l.resourceNames),
 			capacities,
 			l.args.Thresholds,
 			l.args.TargetThresholds,
 		)
 	} else {
 		usage, thresholds = assessNodesUsagesAndStaticThresholds(
-			filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
+			nodesUsageMap,
 			capacities,
 			l.args.Thresholds,
 			l.args.TargetThresholds,
 		)
 	}

+	// classify nodes in under and over utilized. we will later try to move
+	// pods from the overutilized nodes to the underutilized ones.
 	nodeGroups := classifyNodeUsage(
-		usage,
-		thresholds,
+		usage, thresholds,
 		[]classifierFnc{
-			// underutilization
+			// underutilization criteria processing. nodes that are
+			// underutilized but aren't schedulable are ignored.
 			func(nodeName string, usage, threshold api.ResourceThresholds) bool {
 				if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
-					klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(nodesMap[nodeName]))
+					klog.V(2).InfoS(
+						"Node is unschedulable, thus not considered as underutilized",
+						"node", klog.KObj(nodesMap[nodeName]),
+					)
 					return false
 				}
 				return isNodeBelowThreshold(usage, threshold)
 			},
-			// overutilization
+			// overutilization criteria evaluation.
 			func(nodeName string, usage, threshold api.ResourceThresholds) bool {
 				return isNodeAboveThreshold(usage, threshold)
 			},
 		},
 	)

-	// convert groups node []NodeInfo
+	// the nodeutilization package was designed to work with NodeInfo
+	// structs. these structs holds information about how utilized a node
+	// is. we need to go through the result of the classification and turn
+	// it into NodeInfo structs.
 	nodeInfos := make([][]NodeInfo, 2)
-	category := []string{"underutilized", "overutilized"}
-	listedNodes := map[string]struct{}{}
+	categories := []string{"underutilized", "overutilized"}
+	classifiedNodes := map[string]bool{}
 	for i := range nodeGroups {
 		for nodeName := range nodeGroups[i] {
+			classifiedNodes[nodeName] = true
+
 			klog.InfoS(
-				fmt.Sprintf("Node is %s", category[i]),
+				"Node has been classified",
+				"category", categories[i],
 				"node", klog.KObj(nodesMap[nodeName]),
 				"usage", nodesUsageMap[nodeName],
 				"usagePercentage", normalizer.Round(usage[nodeName]),
 			)
-			listedNodes[nodeName] = struct{}{}
+
 			nodeInfos[i] = append(nodeInfos[i], NodeInfo{
 				NodeUsage: NodeUsage{
 					node:    nodesMap[nodeName],
-					usage:   nodesUsageMap[nodeName], // get back the original node usage
+					usage:   nodesUsageMap[nodeName],
 					allPods: podListMap[nodeName],
 				},
-				thresholds: NodeThresholds{
-					lowResourceThreshold:  resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
-					highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
-				},
+				available: capNodeCapacitiesToThreshold(
+					nodesMap[nodeName],
+					thresholds[nodeName][1],
+					l.extendedResourceNames,
+				),
 			})
 		}
 	}

+	// log nodes that are appropriately utilized.
 	for nodeName := range nodesMap {
-		if _, ok := listedNodes[nodeName]; !ok {
+		if !classifiedNodes[nodeName] {
 			klog.InfoS(
 				"Node is appropriately utilized",
 				"node", klog.KObj(nodesMap[nodeName]),
@@ -233,24 +253,27 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
 		}
 	}

-	lowNodes := nodeInfos[0]
-	sourceNodes := nodeInfos[1]
+	lowNodes, highNodes := nodeInfos[0], nodeInfos[1]

-	// log message for nodes with low utilization
-	klog.V(1).InfoS("Criteria for a node under utilization", l.underutilizationCriteria...)
+	// log messages for nodes with low and high utilization
+	klog.V(1).InfoS("Criteria for a node under utilization", l.underCriteria...)
 	klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
-
-	// log message for over utilized nodes
-	klog.V(1).InfoS("Criteria for a node above target utilization", l.overutilizationCriteria...)
-	klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
+	klog.V(1).InfoS("Criteria for a node above target utilization", l.overCriteria...)
+	klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(highNodes))

 	if len(lowNodes) == 0 {
-		klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
+		klog.V(1).InfoS(
+			"No node is underutilized, nothing to do here, you might tune your thresholds further",
+		)
 		return nil
 	}

 	if len(lowNodes) <= l.args.NumberOfNodes {
-		klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", l.args.NumberOfNodes)
+		klog.V(1).InfoS(
+			"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
+			"underutilizedNodes", len(lowNodes),
+			"numberOfNodes", l.args.NumberOfNodes,
+		)
 		return nil
 	}

@@ -259,14 +282,15 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
 		return nil
 	}

-	if len(sourceNodes) == 0 {
+	if len(highNodes) == 0 {
 		klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
 		return nil
 	}

-	// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
+	// this is a stop condition for the eviction process. we stop as soon
+	// as the node usage drops below the threshold.
 	continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage api.ReferencedResourceList) bool {
-		if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) {
+		if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.available) {
 			return false
 		}
 		for name := range totalAvailableUsage {
@@ -278,8 +302,8 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
 		return true
 	}

-	// Sort the nodes by the usage in descending order
-	sortNodesByUsage(sourceNodes, false)
+	// sort the nodes by the usage in descending order
+	sortNodesByUsage(highNodes, false)

 	var nodeLimit *uint
 	if l.args.EvictionLimits != nil {
@@ -289,7 +313,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
 	evictPodsFromSourceNodes(
 		ctx,
 		l.args.EvictableNamespaces,
-		sourceNodes,
+		highNodes,
 		lowNodes,
 		l.handle.Evictor(),
 		evictions.EvictOptions{StrategyName: LowNodeUtilizationPluginName},
@@ -302,3 +326,66 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra

 	return nil
 }
+
+// validatePrometheusMetricsUtilization validates the Prometheus metrics
+// utilization. XXX this should be done way earlier than this.
+func validatePrometheusMetricsUtilization(args *LowNodeUtilizationArgs) error {
+	if args.MetricsUtilization.Prometheus == nil {
+		return fmt.Errorf("prometheus property is missing")
+	}
+
+	if args.MetricsUtilization.Prometheus.Query == "" {
+		return fmt.Errorf("prometheus query is missing")
+	}
+
+	uResourceNames := getResourceNames(args.Thresholds)
+	oResourceNames := getResourceNames(args.TargetThresholds)
+	if len(uResourceNames) != 1 || uResourceNames[0] != MetricResource {
+		return fmt.Errorf(
+			"thresholds are expected to specify a single instance of %q resource, got %v instead",
+			MetricResource, uResourceNames,
+		)
+	}
+
+	if len(oResourceNames) != 1 || oResourceNames[0] != MetricResource {
+		return fmt.Errorf(
+			"targetThresholds are expected to specify a single instance of %q resource, got %v instead",
+			MetricResource, oResourceNames,
+		)
+	}
+
+	return nil
+}
+
+// usageClientForMetrics returns the correct usage client based on the
+// metrics source. XXX MetricsServer is deprecated, removed once dropped.
+func usageClientForMetrics(
+	args *LowNodeUtilizationArgs, handle frameworktypes.Handle, resources []v1.ResourceName,
+) (usageClient, error) {
+	metrics := args.MetricsUtilization
+	switch {
+	case metrics.MetricsServer, metrics.Source == api.KubernetesMetrics:
+		if handle.MetricsCollector() == nil {
+			return nil, fmt.Errorf("metrics client not initialized")
+		}
+		return newActualUsageClient(
+			resources,
+			handle.GetPodsAssignedToNodeFunc(),
+			handle.MetricsCollector(),
+		), nil
+
+	case metrics.Source == api.PrometheusMetrics:
+		if handle.PrometheusClient() == nil {
+			return nil, fmt.Errorf("prometheus client not initialized")
+		}
+		return newPrometheusUsageClient(
+			handle.GetPodsAssignedToNodeFunc(),
+			handle.PrometheusClient(),
+			metrics.Prometheus.Query,
+		), nil
+	case metrics.Source != "":
+		return nil, fmt.Errorf("unrecognized metrics source")
+	default:
+		return nil, fmt.Errorf("metrics source is empty")
+	}
+}