1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

feat: refactoring thresholds and usage assessment

this commit refactors the thresholds and usage assessment for the node
utilization plugins. both high and low plugins are affected by this
change.
This commit is contained in:
Ricardo Maraschini
2025-03-13 20:52:20 +01:00
parent b300faece0
commit 87ba84b2ad
9 changed files with 1233 additions and 240 deletions

View File

@@ -18,7 +18,7 @@ package nodeutilization
import (
"context"
"math"
"maps"
"slices"
"sort"
@@ -28,9 +28,11 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
"sigs.k8s.io/descheduler/pkg/utils"
)
@@ -85,84 +87,6 @@ const (
MaxResourcePercentage = 100
)
func normalizePercentage(percent api.Percentage) api.Percentage {
if percent > MaxResourcePercentage {
return MaxResourcePercentage
}
if percent < MinResourcePercentage {
return MinResourcePercentage
}
return percent
}
func nodeCapacity(node *v1.Node, nodeUsage api.ReferencedResourceList) v1.ResourceList {
capacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
capacity = node.Status.Allocatable
}
// the usage captures the metrics resource
if _, ok := nodeUsage[MetricResource]; ok {
// Make ResourceMetrics 100% => 100 points
capacity[MetricResource] = *resource.NewQuantity(int64(100), resource.DecimalSI)
}
return capacity
}
func getNodeThresholdsFromAverageNodeUsage(
nodes []*v1.Node,
usageClient usageClient,
lowSpan, highSpan api.ResourceThresholds,
) (map[string][]api.ResourceThresholds, api.ResourceThresholds) {
total := api.ResourceThresholds{}
average := api.ResourceThresholds{}
numberOfNodes := len(nodes)
for _, node := range nodes {
usage := usageClient.nodeUtilization(node.Name)
nodeCapacity := nodeCapacity(node, usage)
for resource, value := range usage {
nodeCapacityValue := nodeCapacity[resource]
if resource == v1.ResourceCPU {
total[resource] += api.Percentage(value.MilliValue()) / api.Percentage(nodeCapacityValue.MilliValue()) * 100.0
} else {
total[resource] += api.Percentage(value.Value()) / api.Percentage(nodeCapacityValue.Value()) * 100.0
}
}
}
lowThreshold, highThreshold := api.ResourceThresholds{}, api.ResourceThresholds{}
for resource, value := range total {
average[resource] = value / api.Percentage(numberOfNodes)
// If either of the spans are 0, ignore the resource. I.e. 0%:5% is invalid.
// Any zero span signifies a resource is either not set or is to be ignored.
if lowSpan[resource] == MinResourcePercentage || highSpan[resource] == MinResourcePercentage {
lowThreshold[resource] = 1
highThreshold[resource] = 1
} else {
lowThreshold[resource] = normalizePercentage(average[resource] - lowSpan[resource])
highThreshold[resource] = normalizePercentage(average[resource] + highSpan[resource])
}
}
nodeThresholds := make(map[string][]api.ResourceThresholds)
for _, node := range nodes {
nodeThresholds[node.Name] = []api.ResourceThresholds{
lowThreshold,
highThreshold,
}
}
return nodeThresholds, average
}
func getStaticNodeThresholds(
nodes []*v1.Node,
thresholdsList ...api.ResourceThresholds,
) map[string][]api.ResourceThresholds {
nodeThresholds := make(map[string][]api.ResourceThresholds)
for _, node := range nodes {
nodeThresholds[node.Name] = append([]api.ResourceThresholds{}, slices.Clone(thresholdsList)...)
}
return nodeThresholds
}
// getNodeUsageSnapshot separates the snapshot into easily accesible
// data chunks so the node usage can be processed separately.
func getNodeUsageSnapshot(
@@ -187,7 +111,7 @@ func getNodeUsageSnapshot(
return nodesMap, nodesUsageMap, podListMap
}
func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity {
func resourceThreshold(nodeCapacity api.ReferencedResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity {
defaultFormat := resource.DecimalSI
if resourceName == v1.ResourceMemory {
defaultFormat = resource.BinarySI
@@ -200,7 +124,10 @@ func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceNam
return int64(float64(threshold) * 0.01 * float64(resourceNodeCapacity))
}
resourceCapacityQuantity := nodeCapacity.Name(resourceName, defaultFormat)
resourceCapacityQuantity := &resource.Quantity{Format: defaultFormat}
if _, ok := nodeCapacity[resourceName]; ok {
resourceCapacityQuantity = nodeCapacity[resourceName]
}
if resourceName == v1.ResourceCPU {
return resource.NewMilliQuantity(resourceCapacityFraction(resourceCapacityQuantity.MilliValue()), defaultFormat)
@@ -208,47 +135,17 @@ func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceNam
return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), defaultFormat)
}
func resourceThresholdsToNodeUsage(resourceThresholds api.ResourceThresholds, node *v1.Node) api.ReferencedResourceList {
func resourceThresholdsToNodeUsage(resourceThresholds api.ResourceThresholds, capacity api.ReferencedResourceList, resourceNames []v1.ResourceName) api.ReferencedResourceList {
nodeUsage := make(api.ReferencedResourceList)
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
for resourceName, threshold := range resourceThresholds {
nodeUsage[resourceName] = resourceThreshold(nodeCapacity, resourceName, threshold)
nodeUsage[resourceName] = resourceThreshold(capacity, resourceName, threshold)
}
return nodeUsage
}
func roundTo2Decimals(percentage float64) float64 {
return math.Round(percentage*100) / 100
}
func resourceUsagePercentages(nodeUsage api.ReferencedResourceList, node *v1.Node, round bool) api.ResourceThresholds {
nodeCapacity := nodeCapacity(node, nodeUsage)
resourceUsagePercentage := api.ResourceThresholds{}
for resourceName, resourceUsage := range nodeUsage {
cap := nodeCapacity[resourceName]
if !cap.IsZero() {
value := 100 * float64(resourceUsage.MilliValue()) / float64(cap.MilliValue())
if round {
value = roundTo2Decimals(float64(value))
}
resourceUsagePercentage[resourceName] = api.Percentage(value)
for _, resourceName := range resourceNames {
if _, exists := nodeUsage[resourceName]; !exists {
nodeUsage[resourceName] = capacity[resourceName]
}
}
return resourceUsagePercentage
}
func nodeUsageToResourceThresholds(nodeUsage map[string]api.ReferencedResourceList, nodes map[string]*v1.Node) map[string]api.ResourceThresholds {
resourceThresholds := make(map[string]api.ResourceThresholds)
for nodeName, node := range nodes {
resourceThresholds[nodeName] = resourceUsagePercentages(nodeUsage[nodeName], node, false)
}
return resourceThresholds
return nodeUsage
}
type classifierFnc func(nodeName string, value, threshold api.ResourceThresholds) bool
@@ -503,8 +400,8 @@ func isNodeAboveTargetUtilization(usage NodeUsage, threshold api.ReferencedResou
// isNodeAboveThreshold checks if a node is over a threshold
// At least one resource has to be above the threshold
func isNodeAboveThreshold(usage, threshold api.ResourceThresholds) bool {
for name, resourceValue := range usage {
if threshold[name] < resourceValue {
for name := range threshold {
if threshold[name] < usage[name] {
return true
}
}
@@ -514,8 +411,8 @@ func isNodeAboveThreshold(usage, threshold api.ResourceThresholds) bool {
// isNodeBelowThreshold checks if a node is under a threshold
// All resources have to be below the threshold
func isNodeBelowThreshold(usage, threshold api.ResourceThresholds) bool {
for name, resourceValue := range usage {
if threshold[name] < resourceValue {
for name := range threshold {
if threshold[name] < usage[name] {
return false
}
}
@@ -544,3 +441,155 @@ func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*
return nonRemovablePods, removablePods
}
// assessNodesUsagesAndStaticThresholds converts the raw usage data into
// percentage. Returns the usage (pct) and the thresholds (pct) for each
// node.
func assessNodesUsagesAndStaticThresholds(
rawUsages, rawCapacities map[string]api.ReferencedResourceList,
lowSpan, highSpan api.ResourceThresholds,
) (map[string]api.ResourceThresholds, map[string][]api.ResourceThresholds) {
// first we normalize the node usage from the raw data (Mi, Gi, etc)
// into api.Percentage values.
usage := normalizer.Normalize(
rawUsages, rawCapacities, ResourceUsageToResourceThreshold,
)
// we are not taking the average and applying deviations to it we can
// simply replicate the same threshold across all nodes and return.
thresholds := normalizer.Replicate(
slices.Collect(maps.Keys(usage)),
[]api.ResourceThresholds{lowSpan, highSpan},
)
return usage, thresholds
}
// assessNodesUsagesAndRelativeThresholds converts the raw usage data into
// percentage. Thresholds are calculated based on the average usage. Returns
// the usage (pct) and the thresholds (pct) for each node.
func assessNodesUsagesAndRelativeThresholds(
rawUsages, rawCapacities map[string]api.ReferencedResourceList,
lowSpan, highSpan api.ResourceThresholds,
) (map[string]api.ResourceThresholds, map[string][]api.ResourceThresholds) {
// first we normalize the node usage from the raw data (Mi, Gi, etc)
// into api.Percentage values.
usage := normalizer.Normalize(
rawUsages, rawCapacities, ResourceUsageToResourceThreshold,
)
// calculate the average usage and then deviate it according to the
// user provided thresholds.
average := normalizer.Average(usage)
// calculate the average usage and then deviate it according to the
// user provided thresholds. We also ensure that the value after the
// deviation is at least 1%. this call also replicates the thresholds
// across all nodes.
thresholds := normalizer.Replicate(
slices.Collect(maps.Keys(usage)),
normalizer.Map(
[]api.ResourceThresholds{
normalizer.Sum(average, normalizer.Negate(lowSpan)),
normalizer.Sum(average, highSpan),
},
func(thresholds api.ResourceThresholds) api.ResourceThresholds {
return normalizer.Clamp(thresholds, 0, 100)
},
),
)
return usage, thresholds
}
// referencedResourceListForNodesCapacity returns a ReferencedResourceList for
// the capacity of a list of nodes. If allocatable resources are present, they
// are used instead of capacity.
func referencedResourceListForNodesCapacity(nodes []*v1.Node) map[string]api.ReferencedResourceList {
capacities := map[string]api.ReferencedResourceList{}
for _, node := range nodes {
capacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
capacity = node.Status.Allocatable
}
referenced := api.ReferencedResourceList{}
for name, quantity := range capacity {
referenced[name] = ptr.To(quantity)
}
// XXX the descheduler also manages monitoring queries that are
// supposed to return a value representing a percentage of the
// resource usage. In this case we need to provide a value for
// the MetricResource, which is not present in the node capacity.
referenced[MetricResource] = resource.NewQuantity(
100, resource.DecimalSI,
)
capacities[node.Name] = referenced
}
return capacities
}
// ResourceUsage2ResourceThreshold is an implementation of a Normalizer that
// converts a set of resource usages and totals into percentage. This function
// operates on Quantity Value() for all the resources except CPU, where it uses
// MilliValue().
func ResourceUsageToResourceThreshold(
usages, totals api.ReferencedResourceList,
) api.ResourceThresholds {
result := api.ResourceThresholds{}
for rname, value := range usages {
if value == nil || totals[rname] == nil {
continue
}
total := totals[rname]
used, capacity := value.Value(), total.Value()
if rname == v1.ResourceCPU {
used, capacity = value.MilliValue(), total.MilliValue()
}
var percent float64
if capacity > 0 {
percent = float64(used) / float64(capacity) * 100
}
result[rname] = api.Percentage(percent)
}
return result
}
// uniquifyResourceNames returns a slice of resource names with duplicates
// removed.
func uniquifyResourceNames(resourceNames []v1.ResourceName) []v1.ResourceName {
resourceNamesMap := map[v1.ResourceName]bool{
v1.ResourceCPU: true,
v1.ResourceMemory: true,
v1.ResourcePods: true,
}
for _, resourceName := range resourceNames {
resourceNamesMap[resourceName] = true
}
extendedResourceNames := []v1.ResourceName{}
for resourceName := range resourceNamesMap {
extendedResourceNames = append(extendedResourceNames, resourceName)
}
return extendedResourceNames
}
// filterResourceNamesFromNodeUsage removes from the node usage slice all keys
// that are not present in the resourceNames slice.
func filterResourceNamesFromNodeUsage(
nodeUsage map[string]api.ReferencedResourceList, resourceNames []v1.ResourceName,
) map[string]api.ReferencedResourceList {
newNodeUsage := make(map[string]api.ReferencedResourceList)
for nodeName, usage := range nodeUsage {
newNodeUsage[nodeName] = api.ReferencedResourceList{}
for _, resourceName := range resourceNames {
if _, exists := usage[resourceName]; exists {
newNodeUsage[nodeName][resourceName] = usage[resourceName]
}
}
}
return newNodeUsage
}