1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

nodeutilization: make the classification more generic

This commit is contained in:
Jan Chaloupka
2025-03-11 15:02:44 +01:00
parent b8e3c0bba3
commit 57bb31de78
4 changed files with 260 additions and 141 deletions

View File

@@ -101,19 +101,50 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
}
}
sourceNodes, highNodes := classifyNodes(
getNodeUsage(nodes, h.usageClient),
getNodeThresholds(nodes, h.args.Thresholds, h.targetThresholds, h.resourceNames, false, h.usageClient),
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, h.usageClient)
nodeThresholdsMap := getStaticNodeThresholds(nodes, h.args.Thresholds, h.targetThresholds)
nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap)
nodeGroups := classifyNodeUsage(
nodesUsageAsNodeThresholdsMap,
nodeThresholdsMap,
[]classifierFnc{
// underutilized nodes
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
return isNodeBelowThreshold(usage, threshold)
},
// every other node that is schedulable
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(nodesMap[nodeName]))
return false
}
return true
},
},
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
if nodeutil.IsNodeUnschedulable(node) {
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(node))
return false
}
return !isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
})
)
// convert groups node []NodeInfo
nodeInfos := make([][]NodeInfo, 2)
category := []string{"underutilized", "overutilized"}
for i := range nodeGroups {
for nodeName := range nodeGroups[i] {
klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
NodeUsage: NodeUsage{
node: nodesMap[nodeName],
usage: nodesUsageMap[nodeName], // get back the original node usage
allPods: podListMap[nodeName],
},
thresholds: NodeThresholds{
lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]),
highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]),
},
})
}
}
sourceNodes := nodeInfos[0]
highNodes := nodeInfos[1]
// log message in one line
klog.V(1).InfoS("Criteria for a node below target utilization", h.underutilizationCriteria...)

View File

@@ -122,22 +122,64 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
}
}
lowNodes, sourceNodes := classifyNodes(
getNodeUsage(nodes, l.usageClient),
getNodeThresholds(nodes, l.args.Thresholds, l.args.TargetThresholds, l.resourceNames, l.args.UseDeviationThresholds, l.usageClient),
// The node has to be schedulable (to be able to move workload there)
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
if nodeutil.IsNodeUnschedulable(node) {
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node))
return false
}
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
},
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
return isNodeAboveTargetUtilization(usage, threshold.highResourceThreshold)
nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient)
var nodeThresholdsMap map[string][]api.ResourceThresholds
if l.args.UseDeviationThresholds {
nodeThresholdsMap = getNodeThresholdsFromAverageNodeUsage(nodes, l.usageClient, l.args.Thresholds, l.args.TargetThresholds)
} else {
nodeThresholdsMap = getStaticNodeThresholds(nodes, l.args.Thresholds, l.args.TargetThresholds)
}
nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap)
nodeGroups := classifyNodeUsage(
nodesUsageAsNodeThresholdsMap,
nodeThresholdsMap,
[]classifierFnc{
// underutilization
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(nodesMap[nodeName]))
return false
}
return isNodeBelowThreshold(usage, threshold)
},
// overutilization
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
return isNodeAboveThreshold(usage, threshold)
},
},
)
// convert groups node []NodeInfo
nodeInfos := make([][]NodeInfo, 2)
category := []string{"underutilized", "overutilized"}
listedNodes := map[string]struct{}{}
for i := range nodeGroups {
for nodeName := range nodeGroups[i] {
klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
listedNodes[nodeName] = struct{}{}
nodeInfos[i] = append(nodeInfos[i], NodeInfo{
NodeUsage: NodeUsage{
node: nodesMap[nodeName],
usage: nodesUsageMap[nodeName], // get back the original node usage
allPods: podListMap[nodeName],
},
thresholds: NodeThresholds{
lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]),
highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]),
},
})
}
}
for nodeName := range nodesMap {
if _, ok := listedNodes[nodeName]; !ok {
klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true))
}
}
lowNodes := nodeInfos[0]
sourceNodes := nodeInfos[1]
// log message for nodes with low utilization
klog.V(1).InfoS("Criteria for a node under utilization", l.underutilizationCriteria...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))

View File

@@ -19,6 +19,7 @@ package nodeutilization
import (
"context"
"math"
"slices"
"sort"
"sigs.k8s.io/descheduler/pkg/api"
@@ -34,6 +35,29 @@ import (
"sigs.k8s.io/descheduler/pkg/utils"
)
// []NodeUsage is a snapshot, so allPods can not be read any time to avoid breaking consistency between the node's actual usage and available pods
//
// New data model:
// - node usage: map[string]api.ReferencedResourceList
// - thresholds: map[string]api.ReferencedResourceList
// - all pods: map[string][]*v1.Pod
// After classification:
// - each group will have its own (smaller) node usage and thresholds and allPods
// Both node usage and thresholds are needed to compute the remaining resources that can be evicted/can accepted evicted pods
//
// 1. translate node usages into percentages as float or int64 (how much precision is lost?, maybe use BigInt?)
// 2. produce thresholds (if they need to be computed, otherwise use user provided, they are already in percentages)
// 3. classify nodes into groups
// 4. produces a list of nodes (sorted as before) that have the node usage, the threshold (only one this time) and the snapshottted pod list present
// Data wise
// Produce separated maps for:
// - nodes: map[string]*v1.Node
// - node usage: map[string]api.ReferencedResourceList
// - thresholds: map[string][]api.ReferencedResourceList
// - pod list: map[string][]*v1.Pod
// Once the nodes are classified produce the original []NodeInfo so the code is not that much changed (postponing further refactoring once it is needed)
// NodeUsage stores a node's info, pods on it, thresholds and its resource usage
type NodeUsage struct {
node *v1.Node
@@ -53,9 +77,6 @@ type NodeInfo struct {
type continueEvictionCond func(nodeInfo NodeInfo, totalAvailableUsage api.ReferencedResourceList) bool
// NodePodsMap is a set of (node, pods) pairs
type NodePodsMap map[*v1.Node][]*v1.Pod
const (
// MinResourcePercentage is the minimum value of a resource's percentage
MinResourcePercentage = 0
@@ -73,66 +94,86 @@ func normalizePercentage(percent api.Percentage) api.Percentage {
return percent
}
func getNodeThresholds(
func getNodeThresholdsFromAverageNodeUsage(
nodes []*v1.Node,
lowThreshold, highThreshold api.ResourceThresholds,
resourceNames []v1.ResourceName,
useDeviationThresholds bool,
usageClient usageClient,
) map[string]NodeThresholds {
nodeThresholdsMap := map[string]NodeThresholds{}
averageResourceUsagePercent := api.ResourceThresholds{}
if useDeviationThresholds {
averageResourceUsagePercent = averageNodeBasicresources(nodes, usageClient)
}
lowSpan, highSpan api.ResourceThresholds,
) map[string][]api.ResourceThresholds {
total := api.ResourceThresholds{}
average := api.ResourceThresholds{}
numberOfNodes := len(nodes)
for _, node := range nodes {
usage := usageClient.nodeUtilization(node.Name)
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
nodeThresholdsMap[node.Name] = NodeThresholds{
lowResourceThreshold: api.ReferencedResourceList{},
highResourceThreshold: api.ReferencedResourceList{},
}
for _, resourceName := range resourceNames {
if useDeviationThresholds {
cap := nodeCapacity[resourceName]
if lowThreshold[resourceName] == MinResourcePercentage {
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = &cap
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = &cap
} else {
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]-lowThreshold[resourceName]))
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]+highThreshold[resourceName]))
}
for resource, value := range usage {
nodeCapacityValue := nodeCapacity[resource]
if resource == v1.ResourceCPU {
total[resource] += api.Percentage(value.MilliValue()) / api.Percentage(nodeCapacityValue.MilliValue()) * 100.0
} else {
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, lowThreshold[resourceName])
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, highThreshold[resourceName])
total[resource] += api.Percentage(value.Value()) / api.Percentage(nodeCapacityValue.Value()) * 100.0
}
}
}
return nodeThresholdsMap
lowThreshold, highThreshold := api.ResourceThresholds{}, api.ResourceThresholds{}
for resource, value := range total {
average[resource] = value / api.Percentage(numberOfNodes)
// If either of the spans are 0, ignore the resource. I.e. 0%:5% is invalid.
// Any zero span signifies a resource is either not set or is to be ignored.
if lowSpan[resource] == MinResourcePercentage || highSpan[resource] == MinResourcePercentage {
lowThreshold[resource] = 1
highThreshold[resource] = 1
} else {
lowThreshold[resource] = normalizePercentage(average[resource] - lowSpan[resource])
highThreshold[resource] = normalizePercentage(average[resource] + highSpan[resource])
}
}
nodeThresholds := make(map[string][]api.ResourceThresholds)
for _, node := range nodes {
nodeThresholds[node.Name] = []api.ResourceThresholds{
lowThreshold,
highThreshold,
}
}
return nodeThresholds
}
func getNodeUsage(
func getStaticNodeThresholds(
nodes []*v1.Node,
thresholdsList ...api.ResourceThresholds,
) map[string][]api.ResourceThresholds {
nodeThresholds := make(map[string][]api.ResourceThresholds)
for _, node := range nodes {
nodeThresholds[node.Name] = append([]api.ResourceThresholds{}, slices.Clone(thresholdsList)...)
}
return nodeThresholds
}
// getNodeUsageSnapshot separates the snapshot into easily accesible
// data chunks so the node usage can be processed separately.
func getNodeUsageSnapshot(
nodes []*v1.Node,
usageClient usageClient,
) []NodeUsage {
var nodeUsageList []NodeUsage
) (
map[string]*v1.Node,
map[string]api.ReferencedResourceList,
map[string][]*v1.Pod,
) {
nodesMap := make(map[string]*v1.Node)
// node usage needs to be kept in the original resource quantity since converting to percentages and back is losing precision
nodesUsageMap := make(map[string]api.ReferencedResourceList)
podListMap := make(map[string][]*v1.Pod)
for _, node := range nodes {
nodeUsageList = append(nodeUsageList, NodeUsage{
node: node,
usage: usageClient.nodeUtilization(node.Name),
allPods: usageClient.pods(node.Name),
})
nodesMap[node.Name] = node
nodesUsageMap[node.Name] = usageClient.nodeUtilization(node.Name)
podListMap[node.Name] = usageClient.pods(node.Name)
}
return nodeUsageList
return nodesMap, nodesUsageMap, podListMap
}
func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity {
@@ -156,54 +197,74 @@ func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceNam
return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), defaultFormat)
}
func resourceThresholdsToNodeUsage(resourceThresholds api.ResourceThresholds, node *v1.Node) api.ReferencedResourceList {
nodeUsage := make(api.ReferencedResourceList)
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
for resourceName, threshold := range resourceThresholds {
nodeUsage[resourceName] = resourceThreshold(nodeCapacity, resourceName, threshold)
}
return nodeUsage
}
func roundTo2Decimals(percentage float64) float64 {
return math.Round(percentage*100) / 100
}
func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 {
nodeCapacity := nodeUsage.node.Status.Capacity
if len(nodeUsage.node.Status.Allocatable) > 0 {
nodeCapacity = nodeUsage.node.Status.Allocatable
func resourceUsagePercentages(nodeUsage api.ReferencedResourceList, node *v1.Node, round bool) api.ResourceThresholds {
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
resourceUsagePercentage := map[v1.ResourceName]float64{}
for resourceName, resourceUsage := range nodeUsage.usage {
resourceUsagePercentage := api.ResourceThresholds{}
for resourceName, resourceUsage := range nodeUsage {
cap := nodeCapacity[resourceName]
if !cap.IsZero() {
resourceUsagePercentage[resourceName] = 100 * float64(resourceUsage.MilliValue()) / float64(cap.MilliValue())
resourceUsagePercentage[resourceName] = roundTo2Decimals(resourceUsagePercentage[resourceName])
value := 100 * float64(resourceUsage.MilliValue()) / float64(cap.MilliValue())
if round {
value = roundTo2Decimals(float64(value))
}
resourceUsagePercentage[resourceName] = api.Percentage(value)
}
}
return resourceUsagePercentage
}
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
// low and high thresholds, it is simply ignored.
func classifyNodes(
nodeUsages []NodeUsage,
nodeThresholds map[string]NodeThresholds,
lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool,
) ([]NodeInfo, []NodeInfo) {
lowNodes, highNodes := []NodeInfo{}, []NodeInfo{}
func nodeUsageToResourceThresholds(nodeUsage map[string]api.ReferencedResourceList, nodes map[string]*v1.Node) map[string]api.ResourceThresholds {
resourceThresholds := make(map[string]api.ResourceThresholds)
for nodeName, node := range nodes {
resourceThresholds[nodeName] = resourceUsagePercentages(nodeUsage[nodeName], node, false)
}
return resourceThresholds
}
for _, nodeUsage := range nodeUsages {
nodeInfo := NodeInfo{
NodeUsage: nodeUsage,
thresholds: nodeThresholds[nodeUsage.node.Name],
}
if lowThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) {
klog.InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
lowNodes = append(lowNodes, nodeInfo)
} else if highThresholdFilter(nodeUsage.node, nodeUsage, nodeThresholds[nodeUsage.node.Name]) {
klog.InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
highNodes = append(highNodes, nodeInfo)
} else {
klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
type classifierFnc func(nodeName string, value, threshold api.ResourceThresholds) bool
func classifyNodeUsage(
nodeUsageAsNodeThresholds map[string]api.ResourceThresholds,
nodeThresholdsMap map[string][]api.ResourceThresholds,
classifiers []classifierFnc,
) []map[string]api.ResourceThresholds {
nodeGroups := make([]map[string]api.ResourceThresholds, len(classifiers))
for i := range len(classifiers) {
nodeGroups[i] = make(map[string]api.ResourceThresholds)
}
for nodeName, nodeUsage := range nodeUsageAsNodeThresholds {
for idx, classFnc := range classifiers {
if classFnc(nodeName, nodeUsage, nodeThresholdsMap[nodeName][idx]) {
nodeGroups[idx][nodeName] = nodeUsage
break
}
}
}
return lowNodes, highNodes
return nodeGroups
}
func usageToKeysAndValues(usage api.ReferencedResourceList) []interface{} {
@@ -418,16 +479,25 @@ func isNodeAboveTargetUtilization(usage NodeUsage, threshold api.ReferencedResou
return false
}
// isNodeWithLowUtilization checks if a node is underutilized
// All resources have to be below the low threshold
func isNodeWithLowUtilization(usage NodeUsage, threshold api.ReferencedResourceList) bool {
for name, nodeValue := range usage.usage {
// usage.lowResourceThreshold[name] < nodeValue
if threshold[name].Cmp(*nodeValue) == -1 {
// isNodeAboveThreshold checks if a node is over a threshold
// At least one resource has to be above the threshold
func isNodeAboveThreshold(usage, threshold api.ResourceThresholds) bool {
for name, resourceValue := range usage {
if threshold[name] < resourceValue {
return true
}
}
return false
}
// isNodeBelowThreshold checks if a node is under a threshold
// All resources have to be below the threshold
func isNodeBelowThreshold(usage, threshold api.ResourceThresholds) bool {
for name, resourceValue := range usage {
if threshold[name] < resourceValue {
return false
}
}
return true
}
@@ -453,28 +523,3 @@ func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*
return nonRemovablePods, removablePods
}
func averageNodeBasicresources(nodes []*v1.Node, usageClient usageClient) api.ResourceThresholds {
total := api.ResourceThresholds{}
average := api.ResourceThresholds{}
numberOfNodes := len(nodes)
for _, node := range nodes {
usage := usageClient.nodeUtilization(node.Name)
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
for resource, value := range usage {
nodeCapacityValue := nodeCapacity[resource]
if resource == v1.ResourceCPU {
total[resource] += api.Percentage(value.MilliValue()) / api.Percentage(nodeCapacityValue.MilliValue()) * 100.0
} else {
total[resource] += api.Percentage(value.Value()) / api.Percentage(nodeCapacityValue.Value()) * 100.0
}
}
}
for resource, value := range total {
average[resource] = value / api.Percentage(numberOfNodes)
}
return average
}

View File

@@ -57,8 +57,13 @@ var (
)
func TestResourceUsagePercentages(t *testing.T) {
resourceUsagePercentage := resourceUsagePercentages(NodeUsage{
node: &v1.Node{
resourceUsagePercentage := resourceUsagePercentages(
api.ReferencedResourceList{
v1.ResourceCPU: resource.NewMilliQuantity(1220, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(11, resource.BinarySI),
},
&v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
@@ -72,12 +77,8 @@ func TestResourceUsagePercentages(t *testing.T) {
},
},
},
usage: api.ReferencedResourceList{
v1.ResourceCPU: resource.NewMilliQuantity(1220, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(11, resource.BinarySI),
},
})
true,
)
expectedUsageInIntPercentage := map[v1.ResourceName]float64{
v1.ResourceCPU: 63,
@@ -86,7 +87,7 @@ func TestResourceUsagePercentages(t *testing.T) {
}
for resourceName, percentage := range expectedUsageInIntPercentage {
if math.Floor(resourceUsagePercentage[resourceName]) != percentage {
if math.Floor(float64(resourceUsagePercentage[resourceName])) != percentage {
t.Errorf("Incorrect percentange computation, expected %v, got math.Floor(%v) instead", percentage, resourceUsagePercentage[resourceName])
}
}