mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-27 14:01:12 +01:00
chore: comment the code and simplify some things
this commit comments the low and high utilization plugins. this also simplifies a little bit where it was possible without affecting too much.
This commit is contained in:
@@ -34,120 +34,115 @@ import (
|
||||
|
||||
const LowNodeUtilizationPluginName = "LowNodeUtilization"
|
||||
|
||||
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
|
||||
// to calculate nodes' utilization and not the actual resource usage.
|
||||
|
||||
type LowNodeUtilization struct {
|
||||
handle frameworktypes.Handle
|
||||
args *LowNodeUtilizationArgs
|
||||
podFilter func(pod *v1.Pod) bool
|
||||
underutilizationCriteria []interface{}
|
||||
overutilizationCriteria []interface{}
|
||||
resourceNames []v1.ResourceName
|
||||
extendedResourceNames []v1.ResourceName
|
||||
usageClient usageClient
|
||||
}
|
||||
|
||||
// this lines makes sure that HighNodeUtilization implements the BalancePlugin
|
||||
// interface.
|
||||
var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}
|
||||
|
||||
// NewLowNodeUtilization builds plugin from its arguments while passing a handle
|
||||
func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plugin, error) {
|
||||
lowNodeUtilizationArgsArgs, ok := args.(*LowNodeUtilizationArgs)
|
||||
// LowNodeUtilization evicts pods from overutilized nodes to underutilized
|
||||
// nodes. Note that CPU/Memory requests are used to calculate nodes'
|
||||
// utilization and not the actual resource usage.
|
||||
type LowNodeUtilization struct {
|
||||
handle frameworktypes.Handle
|
||||
args *LowNodeUtilizationArgs
|
||||
podFilter func(pod *v1.Pod) bool
|
||||
underCriteria []any
|
||||
overCriteria []any
|
||||
resourceNames []v1.ResourceName
|
||||
extendedResourceNames []v1.ResourceName
|
||||
usageClient usageClient
|
||||
}
|
||||
|
||||
// NewLowNodeUtilization builds plugin from its arguments while passing a
|
||||
// handle. this plugin aims to move workload from overutilized nodes to
|
||||
// underutilized nodes.
|
||||
func NewLowNodeUtilization(
|
||||
genericArgs runtime.Object, handle frameworktypes.Handle,
|
||||
) (frameworktypes.Plugin, error) {
|
||||
args, ok := genericArgs.(*LowNodeUtilizationArgs)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args)
|
||||
return nil, fmt.Errorf(
|
||||
"want args to be of type LowNodeUtilizationArgs, got %T",
|
||||
genericArgs,
|
||||
)
|
||||
}
|
||||
|
||||
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
|
||||
// resourceNames holds a list of resources for which the user has
|
||||
// provided thresholds for. extendedResourceNames holds those as well
|
||||
// as cpu, memory and pods if no prometheus collection is used.
|
||||
resourceNames := getResourceNames(args.Thresholds)
|
||||
extendedResourceNames := resourceNames
|
||||
|
||||
metricsUtilization := lowNodeUtilizationArgsArgs.MetricsUtilization
|
||||
if metricsUtilization != nil && metricsUtilization.Source == api.PrometheusMetrics {
|
||||
if metricsUtilization.Prometheus != nil && metricsUtilization.Prometheus.Query != "" {
|
||||
uResourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
|
||||
oResourceNames := getResourceNames(lowNodeUtilizationArgsArgs.TargetThresholds)
|
||||
if len(uResourceNames) != 1 || uResourceNames[0] != MetricResource {
|
||||
return nil, fmt.Errorf("thresholds are expected to specify a single instance of %q resource, got %v instead", MetricResource, uResourceNames)
|
||||
}
|
||||
if len(oResourceNames) != 1 || oResourceNames[0] != MetricResource {
|
||||
return nil, fmt.Errorf("targetThresholds are expected to specify a single instance of %q resource, got %v instead", MetricResource, oResourceNames)
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("prometheus query is missing")
|
||||
// if we are using prometheus we need to validate we have everything we
|
||||
// need. if we aren't then we need to make sure we are also collecting
|
||||
// data for cpu, memory and pods.
|
||||
metrics := args.MetricsUtilization
|
||||
if metrics != nil && metrics.Source == api.PrometheusMetrics {
|
||||
if err := validatePrometheusMetricsUtilization(args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
extendedResourceNames = uniquifyResourceNames(append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods))
|
||||
extendedResourceNames = uniquifyResourceNames(
|
||||
append(
|
||||
resourceNames,
|
||||
v1.ResourceCPU,
|
||||
v1.ResourceMemory,
|
||||
v1.ResourcePods,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
underutilizationCriteria := []interface{}{
|
||||
"CPU", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourceCPU],
|
||||
"Mem", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourceMemory],
|
||||
"Pods", lowNodeUtilizationArgsArgs.Thresholds[v1.ResourcePods],
|
||||
// underCriteria and overCriteria are slices used for logging purposes.
|
||||
// we assemble them only once.
|
||||
underCriteria, overCriteria := []any{}, []any{}
|
||||
for name := range args.Thresholds {
|
||||
underCriteria = append(underCriteria, name, args.Thresholds[name])
|
||||
}
|
||||
for name := range lowNodeUtilizationArgsArgs.Thresholds {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
underutilizationCriteria = append(underutilizationCriteria, string(name), int64(lowNodeUtilizationArgsArgs.Thresholds[name]))
|
||||
}
|
||||
for name := range args.TargetThresholds {
|
||||
overCriteria = append(overCriteria, name, args.TargetThresholds[name])
|
||||
}
|
||||
|
||||
overutilizationCriteria := []interface{}{
|
||||
"CPU", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourceCPU],
|
||||
"Mem", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourceMemory],
|
||||
"Pods", lowNodeUtilizationArgsArgs.TargetThresholds[v1.ResourcePods],
|
||||
}
|
||||
for name := range lowNodeUtilizationArgsArgs.TargetThresholds {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
overutilizationCriteria = append(overutilizationCriteria, string(name), int64(lowNodeUtilizationArgsArgs.TargetThresholds[name]))
|
||||
}
|
||||
}
|
||||
|
||||
podFilter, err := podutil.NewOptions().
|
||||
podFilter, err := podutil.
|
||||
NewOptions().
|
||||
WithFilter(handle.Evictor().Filter).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
|
||||
}
|
||||
|
||||
var usageClient usageClient
|
||||
// MetricsServer is deprecated, removed once dropped
|
||||
if metricsUtilization != nil {
|
||||
switch {
|
||||
case metricsUtilization.MetricsServer, metricsUtilization.Source == api.KubernetesMetrics:
|
||||
if handle.MetricsCollector() == nil {
|
||||
return nil, fmt.Errorf("metrics client not initialized")
|
||||
}
|
||||
usageClient = newActualUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
|
||||
case metricsUtilization.Source == api.PrometheusMetrics:
|
||||
if handle.PrometheusClient() == nil {
|
||||
return nil, fmt.Errorf("prometheus client not initialized")
|
||||
}
|
||||
usageClient = newPrometheusUsageClient(handle.GetPodsAssignedToNodeFunc(), handle.PrometheusClient(), metricsUtilization.Prometheus.Query)
|
||||
case metricsUtilization.Source != "":
|
||||
return nil, fmt.Errorf("unrecognized metrics source")
|
||||
default:
|
||||
return nil, fmt.Errorf("metrics source is empty")
|
||||
// this plugins supports different ways of collecting usage data. each
|
||||
// different way provides its own "usageClient". here we make sure we
|
||||
// have the correct one or an error is triggered. XXX MetricsServer is
|
||||
// deprecated, removed once dropped.
|
||||
var usageClient usageClient = newRequestedUsageClient(
|
||||
extendedResourceNames, handle.GetPodsAssignedToNodeFunc(),
|
||||
)
|
||||
if metrics != nil {
|
||||
usageClient, err = usageClientForMetrics(args, handle, extendedResourceNames)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
usageClient = newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc())
|
||||
}
|
||||
|
||||
return &LowNodeUtilization{
|
||||
handle: handle,
|
||||
args: lowNodeUtilizationArgsArgs,
|
||||
underutilizationCriteria: underutilizationCriteria,
|
||||
overutilizationCriteria: overutilizationCriteria,
|
||||
resourceNames: resourceNames,
|
||||
extendedResourceNames: extendedResourceNames,
|
||||
podFilter: podFilter,
|
||||
usageClient: usageClient,
|
||||
handle: handle,
|
||||
args: args,
|
||||
underCriteria: underCriteria,
|
||||
overCriteria: overCriteria,
|
||||
resourceNames: resourceNames,
|
||||
extendedResourceNames: extendedResourceNames,
|
||||
podFilter: podFilter,
|
||||
usageClient: usageClient,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name retrieves the plugin name
|
||||
// Name retrieves the plugin name.
|
||||
func (l *LowNodeUtilization) Name() string {
|
||||
return LowNodeUtilizationPluginName
|
||||
}
|
||||
|
||||
// Balance extension point implementation for the plugin
|
||||
// Balance holds the main logic of the plugin. It evicts pods from over
|
||||
// utilized nodes to under utilized nodes. The goal here is to evenly
|
||||
// distribute pods across nodes.
|
||||
func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
|
||||
if err := l.usageClient.sync(ctx, nodes); err != nil {
|
||||
return &frameworktypes.Status{
|
||||
@@ -155,75 +150,100 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
}
|
||||
}
|
||||
|
||||
// starts by taking a snapshot ofthe nodes usage. we will use this
|
||||
// snapshot to assess the nodes usage and classify them as
|
||||
// underutilized or overutilized.
|
||||
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient)
|
||||
capacities := referencedResourceListForNodesCapacity(nodes)
|
||||
|
||||
// usage, by default, is exposed in absolute values. we need to normalize
|
||||
// them (convert them to percentages) to be able to compare them with the
|
||||
// user provided thresholds. thresholds are already provided in percentage
|
||||
// in the <0; 100> interval.
|
||||
var usage map[string]api.ResourceThresholds
|
||||
var thresholds map[string][]api.ResourceThresholds
|
||||
if l.args.UseDeviationThresholds {
|
||||
// here the thresholds provided by the user represent
|
||||
// deviations from the average so we need to treat them
|
||||
// differently. when calculating the average we only
|
||||
// need to consider the resources for which the user
|
||||
// has provided thresholds.
|
||||
usage, thresholds = assessNodesUsagesAndRelativeThresholds(
|
||||
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
|
||||
filterResourceNames(nodesUsageMap, l.resourceNames),
|
||||
capacities,
|
||||
l.args.Thresholds,
|
||||
l.args.TargetThresholds,
|
||||
)
|
||||
} else {
|
||||
usage, thresholds = assessNodesUsagesAndStaticThresholds(
|
||||
filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames),
|
||||
nodesUsageMap,
|
||||
capacities,
|
||||
l.args.Thresholds,
|
||||
l.args.TargetThresholds,
|
||||
)
|
||||
}
|
||||
|
||||
// classify nodes in under and over utilized. we will later try to move
|
||||
// pods from the overutilized nodes to the underutilized ones.
|
||||
nodeGroups := classifyNodeUsage(
|
||||
usage,
|
||||
thresholds,
|
||||
usage, thresholds,
|
||||
[]classifierFnc{
|
||||
// underutilization
|
||||
// underutilization criteria processing. nodes that are
|
||||
// underutilized but aren't schedulable are ignored.
|
||||
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
|
||||
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(nodesMap[nodeName]))
|
||||
klog.V(2).InfoS(
|
||||
"Node is unschedulable, thus not considered as underutilized",
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
)
|
||||
return false
|
||||
}
|
||||
return isNodeBelowThreshold(usage, threshold)
|
||||
},
|
||||
// overutilization
|
||||
// overutilization criteria evaluation.
|
||||
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
|
||||
return isNodeAboveThreshold(usage, threshold)
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
// convert groups node []NodeInfo
|
||||
// the nodeutilization package was designed to work with NodeInfo
|
||||
// structs. these structs holds information about how utilized a node
|
||||
// is. we need to go through the result of the classification and turn
|
||||
// it into NodeInfo structs.
|
||||
nodeInfos := make([][]NodeInfo, 2)
|
||||
category := []string{"underutilized", "overutilized"}
|
||||
listedNodes := map[string]struct{}{}
|
||||
categories := []string{"underutilized", "overutilized"}
|
||||
classifiedNodes := map[string]bool{}
|
||||
for i := range nodeGroups {
|
||||
for nodeName := range nodeGroups[i] {
|
||||
classifiedNodes[nodeName] = true
|
||||
|
||||
klog.InfoS(
|
||||
fmt.Sprintf("Node is %s", category[i]),
|
||||
"Node has been classified",
|
||||
"category", categories[i],
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
"usage", nodesUsageMap[nodeName],
|
||||
"usagePercentage", normalizer.Round(usage[nodeName]),
|
||||
)
|
||||
listedNodes[nodeName] = struct{}{}
|
||||
|
||||
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
|
||||
NodeUsage: NodeUsage{
|
||||
node: nodesMap[nodeName],
|
||||
usage: nodesUsageMap[nodeName], // get back the original node usage
|
||||
usage: nodesUsageMap[nodeName],
|
||||
allPods: podListMap[nodeName],
|
||||
},
|
||||
thresholds: NodeThresholds{
|
||||
lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
|
||||
highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)),
|
||||
},
|
||||
available: capNodeCapacitiesToThreshold(
|
||||
nodesMap[nodeName],
|
||||
thresholds[nodeName][1],
|
||||
l.extendedResourceNames,
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// log nodes that are appropriately utilized.
|
||||
for nodeName := range nodesMap {
|
||||
if _, ok := listedNodes[nodeName]; !ok {
|
||||
if !classifiedNodes[nodeName] {
|
||||
klog.InfoS(
|
||||
"Node is appropriately utilized",
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
@@ -233,24 +253,27 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
}
|
||||
}
|
||||
|
||||
lowNodes := nodeInfos[0]
|
||||
sourceNodes := nodeInfos[1]
|
||||
lowNodes, highNodes := nodeInfos[0], nodeInfos[1]
|
||||
|
||||
// log message for nodes with low utilization
|
||||
klog.V(1).InfoS("Criteria for a node under utilization", l.underutilizationCriteria...)
|
||||
// log messages for nodes with low and high utilization
|
||||
klog.V(1).InfoS("Criteria for a node under utilization", l.underCriteria...)
|
||||
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
|
||||
|
||||
// log message for over utilized nodes
|
||||
klog.V(1).InfoS("Criteria for a node above target utilization", l.overutilizationCriteria...)
|
||||
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
|
||||
klog.V(1).InfoS("Criteria for a node above target utilization", l.overCriteria...)
|
||||
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(highNodes))
|
||||
|
||||
if len(lowNodes) == 0 {
|
||||
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
||||
klog.V(1).InfoS(
|
||||
"No node is underutilized, nothing to do here, you might tune your thresholds further",
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(lowNodes) <= l.args.NumberOfNodes {
|
||||
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", l.args.NumberOfNodes)
|
||||
klog.V(1).InfoS(
|
||||
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
|
||||
"underutilizedNodes", len(lowNodes),
|
||||
"numberOfNodes", l.args.NumberOfNodes,
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -259,14 +282,15 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(sourceNodes) == 0 {
|
||||
if len(highNodes) == 0 {
|
||||
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
|
||||
return nil
|
||||
}
|
||||
|
||||
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
||||
// this is a stop condition for the eviction process. we stop as soon
|
||||
// as the node usage drops below the threshold.
|
||||
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage api.ReferencedResourceList) bool {
|
||||
if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.thresholds.highResourceThreshold) {
|
||||
if !isNodeAboveTargetUtilization(nodeInfo.NodeUsage, nodeInfo.available) {
|
||||
return false
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
@@ -278,8 +302,8 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
return true
|
||||
}
|
||||
|
||||
// Sort the nodes by the usage in descending order
|
||||
sortNodesByUsage(sourceNodes, false)
|
||||
// sort the nodes by the usage in descending order
|
||||
sortNodesByUsage(highNodes, false)
|
||||
|
||||
var nodeLimit *uint
|
||||
if l.args.EvictionLimits != nil {
|
||||
@@ -289,7 +313,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
evictPodsFromSourceNodes(
|
||||
ctx,
|
||||
l.args.EvictableNamespaces,
|
||||
sourceNodes,
|
||||
highNodes,
|
||||
lowNodes,
|
||||
l.handle.Evictor(),
|
||||
evictions.EvictOptions{StrategyName: LowNodeUtilizationPluginName},
|
||||
@@ -302,3 +326,66 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validatePrometheusMetricsUtilization validates the Prometheus metrics
|
||||
// utilization. XXX this should be done way earlier than this.
|
||||
func validatePrometheusMetricsUtilization(args *LowNodeUtilizationArgs) error {
|
||||
if args.MetricsUtilization.Prometheus == nil {
|
||||
return fmt.Errorf("prometheus property is missing")
|
||||
}
|
||||
|
||||
if args.MetricsUtilization.Prometheus.Query == "" {
|
||||
return fmt.Errorf("prometheus query is missing")
|
||||
}
|
||||
|
||||
uResourceNames := getResourceNames(args.Thresholds)
|
||||
oResourceNames := getResourceNames(args.TargetThresholds)
|
||||
if len(uResourceNames) != 1 || uResourceNames[0] != MetricResource {
|
||||
return fmt.Errorf(
|
||||
"thresholds are expected to specify a single instance of %q resource, got %v instead",
|
||||
MetricResource, uResourceNames,
|
||||
)
|
||||
}
|
||||
|
||||
if len(oResourceNames) != 1 || oResourceNames[0] != MetricResource {
|
||||
return fmt.Errorf(
|
||||
"targetThresholds are expected to specify a single instance of %q resource, got %v instead",
|
||||
MetricResource, oResourceNames,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// usageClientForMetrics returns the correct usage client based on the
|
||||
// metrics source. XXX MetricsServer is deprecated, removed once dropped.
|
||||
func usageClientForMetrics(
|
||||
args *LowNodeUtilizationArgs, handle frameworktypes.Handle, resources []v1.ResourceName,
|
||||
) (usageClient, error) {
|
||||
metrics := args.MetricsUtilization
|
||||
switch {
|
||||
case metrics.MetricsServer, metrics.Source == api.KubernetesMetrics:
|
||||
if handle.MetricsCollector() == nil {
|
||||
return nil, fmt.Errorf("metrics client not initialized")
|
||||
}
|
||||
return newActualUsageClient(
|
||||
resources,
|
||||
handle.GetPodsAssignedToNodeFunc(),
|
||||
handle.MetricsCollector(),
|
||||
), nil
|
||||
|
||||
case metrics.Source == api.PrometheusMetrics:
|
||||
if handle.PrometheusClient() == nil {
|
||||
return nil, fmt.Errorf("prometheus client not initialized")
|
||||
}
|
||||
return newPrometheusUsageClient(
|
||||
handle.GetPodsAssignedToNodeFunc(),
|
||||
handle.PrometheusClient(),
|
||||
metrics.Prometheus.Query,
|
||||
), nil
|
||||
case metrics.Source != "":
|
||||
return nil, fmt.Errorf("unrecognized metrics source")
|
||||
default:
|
||||
return nil, fmt.Errorf("metrics source is empty")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user