diff --git a/go.mod b/go.mod index 564998eb4..de23a7fcf 100644 --- a/go.mod +++ b/go.mod @@ -32,6 +32,8 @@ require ( sigs.k8s.io/yaml v1.4.0 ) +require golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 + require ( cel.dev/expr v0.18.0 // indirect github.com/BurntSushi/toml v0.3.1 // indirect @@ -98,7 +100,6 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.31.0 // indirect - golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.21.0 // indirect golang.org/x/net v0.30.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect diff --git a/pkg/framework/plugins/nodeutilization/highnodeutilization.go b/pkg/framework/plugins/nodeutilization/highnodeutilization.go index e9e412150..869ffc2ad 100644 --- a/pkg/framework/plugins/nodeutilization/highnodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/highnodeutilization.go @@ -28,6 +28,7 @@ import ( nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer" frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" ) @@ -42,6 +43,7 @@ type HighNodeUtilization struct { podFilter func(pod *v1.Pod) bool underutilizationCriteria []interface{} resourceNames []v1.ResourceName + extendedResourceNames []v1.ResourceName targetThresholds api.ResourceThresholds usageClient usageClient } @@ -57,7 +59,7 @@ func NewHighNodeUtilization(args runtime.Object, handle frameworktypes.Handle) ( targetThresholds := make(api.ResourceThresholds) setDefaultForThresholds(highNodeUtilizatioArgs.Thresholds, targetThresholds) - resourceNames := getResourceNames(targetThresholds) + resourceNames := getResourceNames(highNodeUtilizatioArgs.Thresholds) underutilizationCriteria := []interface{}{ "CPU", highNodeUtilizatioArgs.Thresholds[v1.ResourceCPU], @@ -77,14 +79,19 @@ func NewHighNodeUtilization(args runtime.Object, handle frameworktypes.Handle) ( return nil, fmt.Errorf("error initializing pod filter function: %v", err) } + extendedResourceNames := uniquifyResourceNames( + append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods), + ) + return &HighNodeUtilization{ handle: handle, args: highNodeUtilizatioArgs, resourceNames: resourceNames, + extendedResourceNames: extendedResourceNames, targetThresholds: targetThresholds, underutilizationCriteria: underutilizationCriteria, podFilter: podFilter, - usageClient: newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc()), + usageClient: newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc()), }, nil } @@ -102,11 +109,18 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr } nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, h.usageClient) - nodeThresholdsMap := getStaticNodeThresholds(nodes, h.args.Thresholds, h.targetThresholds) - nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap) + capacities := referencedResourceListForNodesCapacity(nodes) + + usage, thresholds := assessNodesUsagesAndStaticThresholds( + nodesUsageMap, + capacities, + h.args.Thresholds, + h.targetThresholds, + ) + nodeGroups := classifyNodeUsage( - nodesUsageAsNodeThresholdsMap, - nodeThresholdsMap, + usage, + thresholds, []classifierFnc{ // underutilized nodes func(nodeName string, usage, threshold api.ResourceThresholds) bool { @@ -128,7 +142,12 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr category := []string{"underutilized", "overutilized"} for i := range nodeGroups { for nodeName := range nodeGroups[i] { - klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true)) + klog.InfoS( + fmt.Sprintf("Node is %s", category[i]), + "node", klog.KObj(nodesMap[nodeName]), + "usage", nodesUsageMap[nodeName], + "usagePercentage", normalizer.Round(usage[nodeName]), + ) nodeInfos[i] = append(nodeInfos[i], NodeInfo{ NodeUsage: NodeUsage{ node: nodesMap[nodeName], @@ -136,8 +155,8 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr allPods: podListMap[nodeName], }, thresholds: NodeThresholds{ - lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]), - highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]), + lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], h.extendedResourceNames), + highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], h.extendedResourceNames), }, }) } @@ -189,7 +208,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr h.handle.Evictor(), evictions.EvictOptions{StrategyName: HighNodeUtilizationPluginName}, h.podFilter, - h.resourceNames, + h.extendedResourceNames, continueEvictionCond, h.usageClient, nil, @@ -199,22 +218,16 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr } func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds) { - // check if Pods/CPU/Mem are set, if not, set them to 100 - if _, ok := thresholds[v1.ResourcePods]; !ok { - thresholds[v1.ResourcePods] = MaxResourcePercentage + if _, ok := thresholds[v1.ResourcePods]; ok { + targetThresholds[v1.ResourcePods] = MaxResourcePercentage } - if _, ok := thresholds[v1.ResourceCPU]; !ok { - thresholds[v1.ResourceCPU] = MaxResourcePercentage + if _, ok := thresholds[v1.ResourceCPU]; ok { + targetThresholds[v1.ResourceCPU] = MaxResourcePercentage } - if _, ok := thresholds[v1.ResourceMemory]; !ok { - thresholds[v1.ResourceMemory] = MaxResourcePercentage + if _, ok := thresholds[v1.ResourceMemory]; ok { + targetThresholds[v1.ResourceMemory] = MaxResourcePercentage } - // Default targetThreshold resource values to 100 - targetThresholds[v1.ResourcePods] = MaxResourcePercentage - targetThresholds[v1.ResourceCPU] = MaxResourcePercentage - targetThresholds[v1.ResourceMemory] = MaxResourcePercentage - for name := range thresholds { if !nodeutil.IsBasicResource(name) { targetThresholds[name] = MaxResourcePercentage diff --git a/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go b/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go index 27a8f26a2..f436f4516 100644 --- a/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/highnodeutilization_test.go @@ -244,7 +244,7 @@ func TestHighNodeUtilization(t *testing.T) { }, // All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value) pods: []*v1.Pod{ - test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) { + test.BuildTestPod("p1", 0, 0, n1NodeName, func(pod *v1.Pod) { test.SetRSOwnerRef(pod) test.MakeBestEffortPod(pod) }), diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization.go b/pkg/framework/plugins/nodeutilization/lownodeutilization.go index 970aea3ca..16c971fe2 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go @@ -28,6 +28,7 @@ import ( "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer" frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" ) @@ -43,6 +44,7 @@ type LowNodeUtilization struct { underutilizationCriteria []interface{} overutilizationCriteria []interface{} resourceNames []v1.ResourceName + extendedResourceNames []v1.ResourceName usageClient usageClient } @@ -55,6 +57,9 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f return nil, fmt.Errorf("want args to be of type LowNodeUtilizationArgs, got %T", args) } + resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds) + extendedResourceNames := resourceNames + metricsUtilization := lowNodeUtilizationArgsArgs.MetricsUtilization if metricsUtilization != nil && metricsUtilization.Source == api.PrometheusMetrics { if metricsUtilization.Prometheus != nil && metricsUtilization.Prometheus.Query != "" { @@ -70,7 +75,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f return nil, fmt.Errorf("prometheus query is missing") } } else { - setDefaultForLNUThresholds(lowNodeUtilizationArgsArgs.Thresholds, lowNodeUtilizationArgsArgs.TargetThresholds, lowNodeUtilizationArgsArgs.UseDeviationThresholds) + extendedResourceNames = uniquifyResourceNames(append(resourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)) } underutilizationCriteria := []interface{}{ @@ -102,8 +107,6 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f return nil, fmt.Errorf("error initializing pod filter function: %v", err) } - resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds) - var usageClient usageClient // MetricsServer is deprecated, removed once dropped if metricsUtilization != nil { @@ -112,7 +115,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f if handle.MetricsCollector() == nil { return nil, fmt.Errorf("metrics client not initialized") } - usageClient = newActualUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector()) + usageClient = newActualUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector()) case metricsUtilization.Source == api.PrometheusMetrics: if handle.PrometheusClient() == nil { return nil, fmt.Errorf("prometheus client not initialized") @@ -124,7 +127,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f return nil, fmt.Errorf("metrics source is empty") } } else { - usageClient = newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc()) + usageClient = newRequestedUsageClient(extendedResourceNames, handle.GetPodsAssignedToNodeFunc()) } return &LowNodeUtilization{ @@ -133,6 +136,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f underutilizationCriteria: underutilizationCriteria, overutilizationCriteria: overutilizationCriteria, resourceNames: resourceNames, + extendedResourceNames: extendedResourceNames, podFilter: podFilter, usageClient: usageClient, }, nil @@ -152,25 +156,29 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra } nodesMap, nodesUsageMap, podListMap := getNodeUsageSnapshot(nodes, l.usageClient) - var nodeThresholdsMap map[string][]api.ResourceThresholds + capacities := referencedResourceListForNodesCapacity(nodes) + + var usage map[string]api.ResourceThresholds + var thresholds map[string][]api.ResourceThresholds if l.args.UseDeviationThresholds { - thresholds, average := getNodeThresholdsFromAverageNodeUsage(nodes, l.usageClient, l.args.Thresholds, l.args.TargetThresholds) - klog.InfoS("Average utilization through all nodes", "utilization", average) - // All nodes are expected to have the same thresholds - for nodeName := range thresholds { - klog.InfoS("Underutilization threshold based on average utilization", "threshold", thresholds[nodeName][0]) - klog.InfoS("Overutilization threshold based on average utilization", "threshold", thresholds[nodeName][1]) - break - } - nodeThresholdsMap = thresholds + usage, thresholds = assessNodesUsagesAndRelativeThresholds( + filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames), + capacities, + l.args.Thresholds, + l.args.TargetThresholds, + ) } else { - nodeThresholdsMap = getStaticNodeThresholds(nodes, l.args.Thresholds, l.args.TargetThresholds) + usage, thresholds = assessNodesUsagesAndStaticThresholds( + filterResourceNamesFromNodeUsage(nodesUsageMap, l.resourceNames), + capacities, + l.args.Thresholds, + l.args.TargetThresholds, + ) } - nodesUsageAsNodeThresholdsMap := nodeUsageToResourceThresholds(nodesUsageMap, nodesMap) nodeGroups := classifyNodeUsage( - nodesUsageAsNodeThresholdsMap, - nodeThresholdsMap, + usage, + thresholds, []classifierFnc{ // underutilization func(nodeName string, usage, threshold api.ResourceThresholds) bool { @@ -193,7 +201,12 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra listedNodes := map[string]struct{}{} for i := range nodeGroups { for nodeName := range nodeGroups[i] { - klog.InfoS("Node is "+category[i], "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true)) + klog.InfoS( + fmt.Sprintf("Node is %s", category[i]), + "node", klog.KObj(nodesMap[nodeName]), + "usage", nodesUsageMap[nodeName], + "usagePercentage", normalizer.Round(usage[nodeName]), + ) listedNodes[nodeName] = struct{}{} nodeInfos[i] = append(nodeInfos[i], NodeInfo{ NodeUsage: NodeUsage{ @@ -202,15 +215,21 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra allPods: podListMap[nodeName], }, thresholds: NodeThresholds{ - lowResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][0], nodesMap[nodeName]), - highResourceThreshold: resourceThresholdsToNodeUsage(nodeThresholdsMap[nodeName][1], nodesMap[nodeName]), + lowResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][0], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)), + highResourceThreshold: resourceThresholdsToNodeUsage(thresholds[nodeName][1], capacities[nodeName], append(l.extendedResourceNames, v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods)), }, }) } } + for nodeName := range nodesMap { if _, ok := listedNodes[nodeName]; !ok { - klog.InfoS("Node is appropriately utilized", "node", klog.KObj(nodesMap[nodeName]), "usage", nodesUsageMap[nodeName], "usagePercentage", resourceUsagePercentages(nodesUsageMap[nodeName], nodesMap[nodeName], true)) + klog.InfoS( + "Node is appropriately utilized", + "node", klog.KObj(nodesMap[nodeName]), + "usage", nodesUsageMap[nodeName], + "usagePercentage", normalizer.Round(usage[nodeName]), + ) } } @@ -275,7 +294,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra l.handle.Evictor(), evictions.EvictOptions{StrategyName: LowNodeUtilizationPluginName}, l.podFilter, - l.resourceNames, + l.extendedResourceNames, continueEvictionCond, l.usageClient, nodeLimit, @@ -283,34 +302,3 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra return nil } - -func setDefaultForLNUThresholds(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) { - // check if Pods/CPU/Mem are set, if not, set them to 100 - if _, ok := thresholds[v1.ResourcePods]; !ok { - if useDeviationThresholds { - thresholds[v1.ResourcePods] = MinResourcePercentage - targetThresholds[v1.ResourcePods] = MinResourcePercentage - } else { - thresholds[v1.ResourcePods] = MaxResourcePercentage - targetThresholds[v1.ResourcePods] = MaxResourcePercentage - } - } - if _, ok := thresholds[v1.ResourceCPU]; !ok { - if useDeviationThresholds { - thresholds[v1.ResourceCPU] = MinResourcePercentage - targetThresholds[v1.ResourceCPU] = MinResourcePercentage - } else { - thresholds[v1.ResourceCPU] = MaxResourcePercentage - targetThresholds[v1.ResourceCPU] = MaxResourcePercentage - } - } - if _, ok := thresholds[v1.ResourceMemory]; !ok { - if useDeviationThresholds { - thresholds[v1.ResourceMemory] = MinResourcePercentage - targetThresholds[v1.ResourceMemory] = MinResourcePercentage - } else { - thresholds[v1.ResourceMemory] = MaxResourcePercentage - targetThresholds[v1.ResourceMemory] = MaxResourcePercentage - } - } -} diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go index 16a609a38..dcf6d60f4 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go @@ -1022,6 +1022,79 @@ func TestLowNodeUtilization(t *testing.T) { expectedPodsWithMetricsEvicted: 2, evictedPods: []string{}, }, + { + name: "deviation thresholds and overevicting memory", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 5, + v1.ResourcePods: 5, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 5, + v1.ResourcePods: 5, + }, + useDeviationThresholds: true, + nodes: []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + // totalcpuusage = 3600m, avgcpuusage = 3600/12000 = 0.3 => 30% + // totalpodsusage = 9, avgpodsusage = 9/30 = 0.3 => 30% + // n1 and n2 are fully memory utilized + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 375, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 375, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 375, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 375, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p5", 400, 375, n1NodeName, test.SetRSOwnerRef), + // These won't be evicted. + test.BuildTestPod("p6", 400, 375, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 375, n1NodeName, func(pod *v1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI), + }, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 375, n1NodeName, func(pod *v1.Pod) { + // A Critical Pod. + test.SetNormalOwnerRef(pod) + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 3000, n2NodeName, test.SetRSOwnerRef), + }, + nodemetricses: []*v1beta1.NodeMetrics{ + test.BuildNodeMetrics(n1NodeName, 4000, 3000), + test.BuildNodeMetrics(n2NodeName, 4000, 3000), + test.BuildNodeMetrics(n3NodeName, 4000, 3000), + }, + podmetricses: []*v1beta1.PodMetrics{ + test.BuildPodMetrics("p1", 400, 375), + test.BuildPodMetrics("p2", 400, 375), + test.BuildPodMetrics("p3", 400, 375), + test.BuildPodMetrics("p4", 400, 375), + test.BuildPodMetrics("p5", 400, 375), + test.BuildPodMetrics("p6", 400, 375), + test.BuildPodMetrics("p7", 400, 375), + test.BuildPodMetrics("p8", 400, 375), + test.BuildPodMetrics("p9", 400, 3000), + }, + expectedPodsEvicted: 0, + expectedPodsWithMetricsEvicted: 0, + evictedPods: []string{}, + }, { name: "without priorities different evictions for requested and actual resources", thresholds: api.ResourceThresholds{ @@ -1612,6 +1685,43 @@ func TestLowNodeUtilizationWithPrometheusMetrics(t *testing.T) { }, expectedPodsEvicted: 2, }, + { + name: "with instance:node_cpu:rate:sum query and deviation thresholds", + args: &LowNodeUtilizationArgs{ + UseDeviationThresholds: true, + Thresholds: api.ResourceThresholds{MetricResource: 10}, + TargetThresholds: api.ResourceThresholds{MetricResource: 10}, + MetricsUtilization: &MetricsUtilization{ + Source: api.PrometheusMetrics, + Prometheus: &Prometheus{ + Query: "instance:node_cpu:rate:sum", + }, + }, + }, + samples: model.Vector{ + sample("instance:node_cpu:rate:sum", n1NodeName, 1), + sample("instance:node_cpu:rate:sum", n2NodeName, 0.5), + sample("instance:node_cpu:rate:sum", n3NodeName, 0), + }, + nodes: []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil), + }, + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, withLocalStorage), + test.BuildTestPod("p8", 400, 0, n1NodeName, withCriticalPod), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + }, + expectedPodsEvicted: 1, + }, } for _, tc := range testCases { diff --git a/pkg/framework/plugins/nodeutilization/nodeutilization.go b/pkg/framework/plugins/nodeutilization/nodeutilization.go index c0d589688..6a9a5af1b 100644 --- a/pkg/framework/plugins/nodeutilization/nodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/nodeutilization.go @@ -18,7 +18,7 @@ package nodeutilization import ( "context" - "math" + "maps" "slices" "sort" @@ -28,9 +28,11 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" + "k8s.io/utils/ptr" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer" frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" "sigs.k8s.io/descheduler/pkg/utils" ) @@ -85,84 +87,6 @@ const ( MaxResourcePercentage = 100 ) -func normalizePercentage(percent api.Percentage) api.Percentage { - if percent > MaxResourcePercentage { - return MaxResourcePercentage - } - if percent < MinResourcePercentage { - return MinResourcePercentage - } - return percent -} - -func nodeCapacity(node *v1.Node, nodeUsage api.ReferencedResourceList) v1.ResourceList { - capacity := node.Status.Capacity - if len(node.Status.Allocatable) > 0 { - capacity = node.Status.Allocatable - } - // the usage captures the metrics resource - if _, ok := nodeUsage[MetricResource]; ok { - // Make ResourceMetrics 100% => 100 points - capacity[MetricResource] = *resource.NewQuantity(int64(100), resource.DecimalSI) - } - return capacity -} - -func getNodeThresholdsFromAverageNodeUsage( - nodes []*v1.Node, - usageClient usageClient, - lowSpan, highSpan api.ResourceThresholds, -) (map[string][]api.ResourceThresholds, api.ResourceThresholds) { - total := api.ResourceThresholds{} - average := api.ResourceThresholds{} - numberOfNodes := len(nodes) - for _, node := range nodes { - usage := usageClient.nodeUtilization(node.Name) - nodeCapacity := nodeCapacity(node, usage) - for resource, value := range usage { - nodeCapacityValue := nodeCapacity[resource] - if resource == v1.ResourceCPU { - total[resource] += api.Percentage(value.MilliValue()) / api.Percentage(nodeCapacityValue.MilliValue()) * 100.0 - } else { - total[resource] += api.Percentage(value.Value()) / api.Percentage(nodeCapacityValue.Value()) * 100.0 - } - } - } - lowThreshold, highThreshold := api.ResourceThresholds{}, api.ResourceThresholds{} - for resource, value := range total { - average[resource] = value / api.Percentage(numberOfNodes) - // If either of the spans are 0, ignore the resource. I.e. 0%:5% is invalid. - // Any zero span signifies a resource is either not set or is to be ignored. - if lowSpan[resource] == MinResourcePercentage || highSpan[resource] == MinResourcePercentage { - lowThreshold[resource] = 1 - highThreshold[resource] = 1 - } else { - lowThreshold[resource] = normalizePercentage(average[resource] - lowSpan[resource]) - highThreshold[resource] = normalizePercentage(average[resource] + highSpan[resource]) - } - } - - nodeThresholds := make(map[string][]api.ResourceThresholds) - for _, node := range nodes { - nodeThresholds[node.Name] = []api.ResourceThresholds{ - lowThreshold, - highThreshold, - } - } - return nodeThresholds, average -} - -func getStaticNodeThresholds( - nodes []*v1.Node, - thresholdsList ...api.ResourceThresholds, -) map[string][]api.ResourceThresholds { - nodeThresholds := make(map[string][]api.ResourceThresholds) - for _, node := range nodes { - nodeThresholds[node.Name] = append([]api.ResourceThresholds{}, slices.Clone(thresholdsList)...) - } - return nodeThresholds -} - // getNodeUsageSnapshot separates the snapshot into easily accesible // data chunks so the node usage can be processed separately. func getNodeUsageSnapshot( @@ -187,7 +111,7 @@ func getNodeUsageSnapshot( return nodesMap, nodesUsageMap, podListMap } -func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity { +func resourceThreshold(nodeCapacity api.ReferencedResourceList, resourceName v1.ResourceName, threshold api.Percentage) *resource.Quantity { defaultFormat := resource.DecimalSI if resourceName == v1.ResourceMemory { defaultFormat = resource.BinarySI @@ -200,7 +124,10 @@ func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceNam return int64(float64(threshold) * 0.01 * float64(resourceNodeCapacity)) } - resourceCapacityQuantity := nodeCapacity.Name(resourceName, defaultFormat) + resourceCapacityQuantity := &resource.Quantity{Format: defaultFormat} + if _, ok := nodeCapacity[resourceName]; ok { + resourceCapacityQuantity = nodeCapacity[resourceName] + } if resourceName == v1.ResourceCPU { return resource.NewMilliQuantity(resourceCapacityFraction(resourceCapacityQuantity.MilliValue()), defaultFormat) @@ -208,47 +135,17 @@ func resourceThreshold(nodeCapacity v1.ResourceList, resourceName v1.ResourceNam return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), defaultFormat) } -func resourceThresholdsToNodeUsage(resourceThresholds api.ResourceThresholds, node *v1.Node) api.ReferencedResourceList { +func resourceThresholdsToNodeUsage(resourceThresholds api.ResourceThresholds, capacity api.ReferencedResourceList, resourceNames []v1.ResourceName) api.ReferencedResourceList { nodeUsage := make(api.ReferencedResourceList) - - nodeCapacity := node.Status.Capacity - if len(node.Status.Allocatable) > 0 { - nodeCapacity = node.Status.Allocatable - } for resourceName, threshold := range resourceThresholds { - nodeUsage[resourceName] = resourceThreshold(nodeCapacity, resourceName, threshold) + nodeUsage[resourceName] = resourceThreshold(capacity, resourceName, threshold) } - - return nodeUsage -} - -func roundTo2Decimals(percentage float64) float64 { - return math.Round(percentage*100) / 100 -} - -func resourceUsagePercentages(nodeUsage api.ReferencedResourceList, node *v1.Node, round bool) api.ResourceThresholds { - nodeCapacity := nodeCapacity(node, nodeUsage) - - resourceUsagePercentage := api.ResourceThresholds{} - for resourceName, resourceUsage := range nodeUsage { - cap := nodeCapacity[resourceName] - if !cap.IsZero() { - value := 100 * float64(resourceUsage.MilliValue()) / float64(cap.MilliValue()) - if round { - value = roundTo2Decimals(float64(value)) - } - resourceUsagePercentage[resourceName] = api.Percentage(value) + for _, resourceName := range resourceNames { + if _, exists := nodeUsage[resourceName]; !exists { + nodeUsage[resourceName] = capacity[resourceName] } } - return resourceUsagePercentage -} - -func nodeUsageToResourceThresholds(nodeUsage map[string]api.ReferencedResourceList, nodes map[string]*v1.Node) map[string]api.ResourceThresholds { - resourceThresholds := make(map[string]api.ResourceThresholds) - for nodeName, node := range nodes { - resourceThresholds[nodeName] = resourceUsagePercentages(nodeUsage[nodeName], node, false) - } - return resourceThresholds + return nodeUsage } type classifierFnc func(nodeName string, value, threshold api.ResourceThresholds) bool @@ -503,8 +400,8 @@ func isNodeAboveTargetUtilization(usage NodeUsage, threshold api.ReferencedResou // isNodeAboveThreshold checks if a node is over a threshold // At least one resource has to be above the threshold func isNodeAboveThreshold(usage, threshold api.ResourceThresholds) bool { - for name, resourceValue := range usage { - if threshold[name] < resourceValue { + for name := range threshold { + if threshold[name] < usage[name] { return true } } @@ -514,8 +411,8 @@ func isNodeAboveThreshold(usage, threshold api.ResourceThresholds) bool { // isNodeBelowThreshold checks if a node is under a threshold // All resources have to be below the threshold func isNodeBelowThreshold(usage, threshold api.ResourceThresholds) bool { - for name, resourceValue := range usage { - if threshold[name] < resourceValue { + for name := range threshold { + if threshold[name] < usage[name] { return false } } @@ -544,3 +441,155 @@ func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []* return nonRemovablePods, removablePods } + +// assessNodesUsagesAndStaticThresholds converts the raw usage data into +// percentage. Returns the usage (pct) and the thresholds (pct) for each +// node. +func assessNodesUsagesAndStaticThresholds( + rawUsages, rawCapacities map[string]api.ReferencedResourceList, + lowSpan, highSpan api.ResourceThresholds, +) (map[string]api.ResourceThresholds, map[string][]api.ResourceThresholds) { + // first we normalize the node usage from the raw data (Mi, Gi, etc) + // into api.Percentage values. + usage := normalizer.Normalize( + rawUsages, rawCapacities, ResourceUsageToResourceThreshold, + ) + + // we are not taking the average and applying deviations to it we can + // simply replicate the same threshold across all nodes and return. + thresholds := normalizer.Replicate( + slices.Collect(maps.Keys(usage)), + []api.ResourceThresholds{lowSpan, highSpan}, + ) + return usage, thresholds +} + +// assessNodesUsagesAndRelativeThresholds converts the raw usage data into +// percentage. Thresholds are calculated based on the average usage. Returns +// the usage (pct) and the thresholds (pct) for each node. +func assessNodesUsagesAndRelativeThresholds( + rawUsages, rawCapacities map[string]api.ReferencedResourceList, + lowSpan, highSpan api.ResourceThresholds, +) (map[string]api.ResourceThresholds, map[string][]api.ResourceThresholds) { + // first we normalize the node usage from the raw data (Mi, Gi, etc) + // into api.Percentage values. + usage := normalizer.Normalize( + rawUsages, rawCapacities, ResourceUsageToResourceThreshold, + ) + + // calculate the average usage and then deviate it according to the + // user provided thresholds. + average := normalizer.Average(usage) + + // calculate the average usage and then deviate it according to the + // user provided thresholds. We also ensure that the value after the + // deviation is at least 1%. this call also replicates the thresholds + // across all nodes. + thresholds := normalizer.Replicate( + slices.Collect(maps.Keys(usage)), + normalizer.Map( + []api.ResourceThresholds{ + normalizer.Sum(average, normalizer.Negate(lowSpan)), + normalizer.Sum(average, highSpan), + }, + func(thresholds api.ResourceThresholds) api.ResourceThresholds { + return normalizer.Clamp(thresholds, 0, 100) + }, + ), + ) + + return usage, thresholds +} + +// referencedResourceListForNodesCapacity returns a ReferencedResourceList for +// the capacity of a list of nodes. If allocatable resources are present, they +// are used instead of capacity. +func referencedResourceListForNodesCapacity(nodes []*v1.Node) map[string]api.ReferencedResourceList { + capacities := map[string]api.ReferencedResourceList{} + for _, node := range nodes { + capacity := node.Status.Capacity + if len(node.Status.Allocatable) > 0 { + capacity = node.Status.Allocatable + } + + referenced := api.ReferencedResourceList{} + for name, quantity := range capacity { + referenced[name] = ptr.To(quantity) + } + + // XXX the descheduler also manages monitoring queries that are + // supposed to return a value representing a percentage of the + // resource usage. In this case we need to provide a value for + // the MetricResource, which is not present in the node capacity. + referenced[MetricResource] = resource.NewQuantity( + 100, resource.DecimalSI, + ) + + capacities[node.Name] = referenced + } + return capacities +} + +// ResourceUsage2ResourceThreshold is an implementation of a Normalizer that +// converts a set of resource usages and totals into percentage. This function +// operates on Quantity Value() for all the resources except CPU, where it uses +// MilliValue(). +func ResourceUsageToResourceThreshold( + usages, totals api.ReferencedResourceList, +) api.ResourceThresholds { + result := api.ResourceThresholds{} + for rname, value := range usages { + if value == nil || totals[rname] == nil { + continue + } + + total := totals[rname] + used, capacity := value.Value(), total.Value() + if rname == v1.ResourceCPU { + used, capacity = value.MilliValue(), total.MilliValue() + } + + var percent float64 + if capacity > 0 { + percent = float64(used) / float64(capacity) * 100 + } + + result[rname] = api.Percentage(percent) + } + return result +} + +// uniquifyResourceNames returns a slice of resource names with duplicates +// removed. +func uniquifyResourceNames(resourceNames []v1.ResourceName) []v1.ResourceName { + resourceNamesMap := map[v1.ResourceName]bool{ + v1.ResourceCPU: true, + v1.ResourceMemory: true, + v1.ResourcePods: true, + } + for _, resourceName := range resourceNames { + resourceNamesMap[resourceName] = true + } + extendedResourceNames := []v1.ResourceName{} + for resourceName := range resourceNamesMap { + extendedResourceNames = append(extendedResourceNames, resourceName) + } + return extendedResourceNames +} + +// filterResourceNamesFromNodeUsage removes from the node usage slice all keys +// that are not present in the resourceNames slice. +func filterResourceNamesFromNodeUsage( + nodeUsage map[string]api.ReferencedResourceList, resourceNames []v1.ResourceName, +) map[string]api.ReferencedResourceList { + newNodeUsage := make(map[string]api.ReferencedResourceList) + for nodeName, usage := range nodeUsage { + newNodeUsage[nodeName] = api.ReferencedResourceList{} + for _, resourceName := range resourceNames { + if _, exists := usage[resourceName]; exists { + newNodeUsage[nodeName][resourceName] = usage[resourceName] + } + } + } + return newNodeUsage +} diff --git a/pkg/framework/plugins/nodeutilization/nodeutilization_test.go b/pkg/framework/plugins/nodeutilization/nodeutilization_test.go index 1a6141473..3404ada0e 100644 --- a/pkg/framework/plugins/nodeutilization/nodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/nodeutilization_test.go @@ -17,7 +17,7 @@ limitations under the License. package nodeutilization import ( - "math" + "reflect" "testing" v1 "k8s.io/api/core/v1" @@ -56,45 +56,6 @@ var ( extendedResource = v1.ResourceName("example.com/foo") ) -func TestResourceUsagePercentages(t *testing.T) { - resourceUsagePercentage := resourceUsagePercentages( - api.ReferencedResourceList{ - v1.ResourceCPU: resource.NewMilliQuantity(1220, resource.DecimalSI), - v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI), - v1.ResourcePods: resource.NewQuantity(11, resource.BinarySI), - }, - &v1.Node{ - Status: v1.NodeStatus{ - Capacity: v1.ResourceList{ - v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), - v1.ResourceMemory: *resource.NewQuantity(3977868*1024, resource.BinarySI), - v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: *resource.NewMilliQuantity(1930, resource.DecimalSI), - v1.ResourceMemory: *resource.NewQuantity(3287692*1024, resource.BinarySI), - v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI), - }, - }, - }, - true, - ) - - expectedUsageInIntPercentage := map[v1.ResourceName]float64{ - v1.ResourceCPU: 63, - v1.ResourceMemory: 90, - v1.ResourcePods: 37, - } - - for resourceName, percentage := range expectedUsageInIntPercentage { - if math.Floor(float64(resourceUsagePercentage[resourceName])) != percentage { - t.Errorf("Incorrect percentange computation, expected %v, got math.Floor(%v) instead", percentage, resourceUsagePercentage[resourceName]) - } - } - - t.Logf("resourceUsagePercentage: %#v\n", resourceUsagePercentage) -} - func TestSortNodesByUsage(t *testing.T) { tests := []struct { name string @@ -173,3 +134,83 @@ func TestSortNodesByUsage(t *testing.T) { }) } } + +func TestResourceUsageToResourceThreshold(t *testing.T) { + for _, tt := range []struct { + name string + usage api.ReferencedResourceList + capacity api.ReferencedResourceList + expected api.ResourceThresholds + }{ + { + name: "10 percent", + usage: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(100, resource.DecimalSI), + }, + capacity: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(1000, resource.DecimalSI), + }, + expected: api.ResourceThresholds{v1.ResourceCPU: 10}, + }, + { + name: "zeroed out capacity", + usage: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(100, resource.DecimalSI), + }, + capacity: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI), + }, + expected: api.ResourceThresholds{v1.ResourceCPU: 0}, + }, + { + name: "non existing usage", + usage: api.ReferencedResourceList{ + "does-not-exist": resource.NewMilliQuantity(100, resource.DecimalSI), + }, + capacity: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(100, resource.DecimalSI), + v1.ResourceMemory: resource.NewMilliQuantity(100, resource.DecimalSI), + }, + expected: api.ResourceThresholds{}, + }, + { + name: "existing and non existing usage", + usage: api.ReferencedResourceList{ + "does-not-exist": resource.NewMilliQuantity(100, resource.DecimalSI), + v1.ResourceCPU: resource.NewMilliQuantity(200, resource.DecimalSI), + }, + capacity: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(1000, resource.DecimalSI), + v1.ResourceMemory: resource.NewMilliQuantity(1000, resource.DecimalSI), + }, + expected: api.ResourceThresholds{v1.ResourceCPU: 20}, + }, + { + name: "nil usage", + usage: api.ReferencedResourceList{ + v1.ResourceCPU: nil, + }, + capacity: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(1000, resource.DecimalSI), + }, + expected: api.ResourceThresholds{}, + }, + { + name: "nil capacity", + usage: api.ReferencedResourceList{ + v1.ResourceCPU: resource.NewMilliQuantity(100, resource.DecimalSI), + }, + capacity: api.ReferencedResourceList{ + v1.ResourceCPU: nil, + }, + expected: api.ResourceThresholds{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + result := ResourceUsageToResourceThreshold(tt.usage, tt.capacity) + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("Expected %v, got %v", tt.expected, result) + } + }) + } +} diff --git a/pkg/framework/plugins/nodeutilization/normalizer/normalizer.go b/pkg/framework/plugins/nodeutilization/normalizer/normalizer.go new file mode 100644 index 000000000..294a6cd39 --- /dev/null +++ b/pkg/framework/plugins/nodeutilization/normalizer/normalizer.go @@ -0,0 +1,142 @@ +/* +Copyright 2025 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package normalizer + +import ( + "math" + + "golang.org/x/exp/constraints" +) + +// Normalizer is a function that receives two values of the same type and +// return an object of a different type. An usage case can be a function +// that converts a memory usage from mb to % (the first argument would be +// the memory usage in mb and the second argument would be the total memory +// available in mb). +type Normalizer[V, N any] func(V, V) N + +// Values is a map of values indexed by a comparable key. An example of this +// can be a list of resources indexed by a node name. +type Values[K comparable, V any] map[K]V + +// Number is an interface that represents a number. Represents things we +// can do math operations on. +type Number interface { + constraints.Integer | constraints.Float +} + +// Normalize uses a Normalizer function to normalize a set of values. For +// example one may want to convert a set of memory usages from mb to %. +// This function receives a set of usages, a set of totals, and a Normalizer +// function. The function will return a map with the normalized values. +func Normalize[K comparable, V, N any](usages, totals Values[K, V], fn Normalizer[V, N]) map[K]N { + result := Values[K, N]{} + for key, value := range usages { + total, ok := totals[key] + if !ok { + continue + } + result[key] = fn(value, total) + } + return result +} + +// Replicate replicates the provide value for each key in the provided slice. +// Returns a map with the keys and the provided value. +func Replicate[K comparable, V any](keys []K, value V) map[K]V { + result := map[K]V{} + for _, key := range keys { + result[key] = value + } + return result +} + +// Clamp imposes minimum and maximum limits on a set of values. The function +// will return a set of values where each value is between the minimum and +// maximum values (included). Values below minimum are rounded up to the +// minimum value, and values above maximum are rounded down to the maximum +// value. +func Clamp[K comparable, N Number, V ~map[K]N](values V, minimum, maximum N) V { + result := V{} + for key := range values { + value := values[key] + value = N(math.Max(float64(value), float64(minimum))) + value = N(math.Min(float64(value), float64(maximum))) + result[key] = value + } + return result +} + +// Map applies a function to each element of a map of values. Returns a new +// slice with the results of applying the function to each element. +func Map[K comparable, N Number, V ~map[K]N](items []V, fn func(V) V) []V { + result := []V{} + for _, item := range items { + result = append(result, fn(item)) + } + return result +} + +// Negate converts the values of a map to their negated values. +func Negate[K comparable, N Number, V ~map[K]N](values V) V { + result := V{} + for key, value := range values { + result[key] = -value + } + return result +} + +// Round rounds the values of a map to the nearest integer. Calls math.Round on +// each value of the map. +func Round[K comparable, N Number, V ~map[K]N](values V) V { + result := V{} + for key, value := range values { + result[key] = N(math.Round(float64(value))) + } + return result +} + +// Sum sums up the values of two maps. Values are expected to be of Number +// type. Original values are preserved. If a key is present in one map but +// not in the other, the key is ignored. +func Sum[K comparable, N Number, V ~map[K]N](mapA, mapB V) V { + result := V{} + for name, value := range mapA { + result[name] = value + mapB[name] + } + return result +} + +// Average calculates the average of a set of values. This function receives +// a map of values and returns the average of all the values. Average expects +// the values to represent the same unit of measure. You can use this function +// after Normalizing the values. +func Average[J, K comparable, N Number, V ~map[J]N](values map[K]V) V { + counter := map[J]int{} + result := V{} + for _, imap := range values { + for name, value := range imap { + result[name] += value + counter[name]++ + } + } + + for name := range result { + result[name] /= N(counter[name]) + } + + return result +} diff --git a/pkg/framework/plugins/nodeutilization/normalizer/normalizer_test.go b/pkg/framework/plugins/nodeutilization/normalizer/normalizer_test.go new file mode 100644 index 000000000..7af17b6dc --- /dev/null +++ b/pkg/framework/plugins/nodeutilization/normalizer/normalizer_test.go @@ -0,0 +1,649 @@ +/* +Copyright 2025 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package normalizer + +import ( + "fmt" + "math" + "reflect" + "testing" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/descheduler/pkg/api" +) + +func ResourceListUsageNormalizer(usages, totals v1.ResourceList) api.ResourceThresholds { + result := api.ResourceThresholds{} + for rname, value := range usages { + total, ok := totals[rname] + if !ok { + continue + } + + used, avail := value.Value(), total.Value() + if rname == v1.ResourceCPU { + used, avail = value.MilliValue(), total.MilliValue() + } + + pct := math.Max(math.Min(float64(used)/float64(avail)*100, 100), 0) + result[rname] = api.Percentage(pct) + } + return result +} + +func TestNormalizeSimple(t *testing.T) { + for _, tt := range []struct { + name string + usages map[string]float64 + totals map[string]float64 + expected map[string]float64 + normalizer Normalizer[float64, float64] + }{ + { + name: "single normalization", + usages: map[string]float64{"cpu": 1}, + totals: map[string]float64{"cpu": 2}, + expected: map[string]float64{"cpu": 0.5}, + normalizer: func(usage, total float64) float64 { + return usage / total + }, + }, + { + name: "multiple normalizations", + usages: map[string]float64{ + "cpu": 1, + "mem": 6, + }, + totals: map[string]float64{ + "cpu": 2, + "mem": 10, + }, + expected: map[string]float64{ + "cpu": 0.5, + "mem": 0.6, + }, + normalizer: func(usage, total float64) float64 { + return usage / total + }, + }, + { + name: "missing totals for a key", + usages: map[string]float64{ + "cpu": 1, + "mem": 6, + }, + totals: map[string]float64{ + "cpu": 2, + }, + expected: map[string]float64{ + "cpu": 0.5, + }, + normalizer: func(usage, total float64) float64 { + return usage / total + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + result := Normalize(tt.usages, tt.totals, tt.normalizer) + if !reflect.DeepEqual(result, tt.expected) { + t.Fatalf("unexpected result: %v", result) + } + }) + } +} + +func TestNormalize(t *testing.T) { + for _, tt := range []struct { + name string + usages map[string]v1.ResourceList + totals map[string]v1.ResourceList + expected map[string]api.ResourceThresholds + normalizer Normalizer[v1.ResourceList, api.ResourceThresholds] + }{ + { + name: "single normalization", + usages: map[string]v1.ResourceList{ + "node1": {v1.ResourceCPU: resource.MustParse("1")}, + }, + totals: map[string]v1.ResourceList{ + "node1": {v1.ResourceCPU: resource.MustParse("2")}, + }, + expected: map[string]api.ResourceThresholds{ + "node1": {v1.ResourceCPU: 50}, + }, + normalizer: ResourceListUsageNormalizer, + }, + { + name: "multiple normalization", + usages: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("6"), + v1.ResourcePods: resource.MustParse("2"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("20"), + v1.ResourcePods: resource.MustParse("30"), + }, + }, + totals: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("6"), + v1.ResourcePods: resource.MustParse("100"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("100"), + v1.ResourceMemory: resource.MustParse("100"), + v1.ResourcePods: resource.MustParse("100"), + }, + }, + expected: map[string]api.ResourceThresholds{ + "node1": { + v1.ResourceCPU: 50, + v1.ResourceMemory: 100, + v1.ResourcePods: 2, + }, + "node2": { + v1.ResourceCPU: 10, + v1.ResourceMemory: 20, + v1.ResourcePods: 30, + }, + }, + normalizer: ResourceListUsageNormalizer, + }, + { + name: "multiple normalization with over 100% usage", + usages: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("120"), + v1.ResourceMemory: resource.MustParse("130"), + v1.ResourcePods: resource.MustParse("140"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("150"), + v1.ResourceMemory: resource.MustParse("160"), + v1.ResourcePods: resource.MustParse("170"), + }, + }, + totals: Replicate( + []string{"node1", "node2"}, + v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100"), + v1.ResourceMemory: resource.MustParse("100"), + v1.ResourcePods: resource.MustParse("100"), + }, + ), + expected: Replicate( + []string{"node1", "node2"}, + api.ResourceThresholds{ + v1.ResourceCPU: 100, + v1.ResourceMemory: 100, + v1.ResourcePods: 100, + }, + ), + normalizer: ResourceListUsageNormalizer, + }, + { + name: "multiple normalization with over 100% usage and different totals", + usages: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("2Gi"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("99"), + v1.ResourceMemory: resource.MustParse("99Gi"), + }, + "node3": { + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + totals: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("4Gi"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("100"), + v1.ResourceMemory: resource.MustParse("100Gi"), + }, + "node3": { + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + expected: map[string]api.ResourceThresholds{ + "node1": { + v1.ResourceCPU: 50, + v1.ResourceMemory: 50, + }, + "node2": { + v1.ResourceCPU: 99, + v1.ResourceMemory: 99, + }, + "node3": { + v1.ResourceCPU: 100, + v1.ResourceMemory: 100, + }, + }, + normalizer: ResourceListUsageNormalizer, + }, + } { + t.Run(tt.name, func(t *testing.T) { + result := Normalize(tt.usages, tt.totals, tt.normalizer) + if !reflect.DeepEqual(result, tt.expected) { + t.Fatalf("unexpected result: %v", result) + } + }) + } +} + +func TestAverage(t *testing.T) { + for _, tt := range []struct { + name string + usage map[string]v1.ResourceList + limits map[string]v1.ResourceList + expected api.ResourceThresholds + }{ + { + name: "empty usage", + usage: map[string]v1.ResourceList{}, + limits: map[string]v1.ResourceList{}, + expected: api.ResourceThresholds{}, + }, + { + name: "fifty percent usage", + usage: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("1"), + v1.ResourceMemory: resource.MustParse("6"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("6"), + }, + }, + limits: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("12"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("12"), + }, + }, + expected: api.ResourceThresholds{ + v1.ResourceCPU: 50, + v1.ResourceMemory: 50, + }, + }, + { + name: "mixed percent usage", + usage: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("80"), + v1.ResourcePods: resource.MustParse("20"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("20"), + v1.ResourceMemory: resource.MustParse("60"), + v1.ResourcePods: resource.MustParse("20"), + }, + }, + limits: Replicate( + []string{"node1", "node2"}, + v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100"), + v1.ResourceMemory: resource.MustParse("100"), + v1.ResourcePods: resource.MustParse("10000"), + }, + ), + expected: api.ResourceThresholds{ + v1.ResourceCPU: 15, + v1.ResourceMemory: 70, + v1.ResourcePods: 0.2, + }, + }, + { + name: "mixed limits", + usage: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("30"), + v1.ResourcePods: resource.MustParse("200"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("72"), + v1.ResourcePods: resource.MustParse("200"), + }, + }, + limits: map[string]v1.ResourceList{ + "node1": { + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100"), + v1.ResourcePods: resource.MustParse("1000"), + }, + "node2": { + v1.ResourceCPU: resource.MustParse("1000"), + v1.ResourceMemory: resource.MustParse("180"), + v1.ResourcePods: resource.MustParse("10"), + }, + }, + expected: api.ResourceThresholds{ + v1.ResourceCPU: 50.5, + v1.ResourceMemory: 35, + v1.ResourcePods: 60, + }, + }, + { + name: "some nodes missing some resources", + usage: map[string]v1.ResourceList{ + "node1": { + "limit-exists-in-all": resource.MustParse("10"), + "limit-exists-in-two": resource.MustParse("11"), + "limit-does-not-exist": resource.MustParse("12"), + "usage-exists-in-all": resource.MustParse("13"), + "usage-exists-in-two": resource.MustParse("20"), + }, + "node2": { + "limit-exists-in-all": resource.MustParse("10"), + "limit-exists-in-two": resource.MustParse("11"), + "limit-does-not-exist": resource.MustParse("12"), + "usage-exists-in-all": resource.MustParse("13"), + "usage-exists-in-two": resource.MustParse("20"), + }, + "node3": { + "limit-exists-in-all": resource.MustParse("10"), + "limit-exists-in-two": resource.MustParse("11"), + "limit-does-not-exist": resource.MustParse("12"), + "usage-exists-in-all": resource.MustParse("13"), + }, + "node4": { + "limit-exists-in-all": resource.MustParse("10"), + "limit-exists-in-two": resource.MustParse("11"), + "limit-does-not-exist": resource.MustParse("12"), + "usage-exists-in-all": resource.MustParse("13"), + }, + "node5": { + "random-usage-without-limit": resource.MustParse("10"), + }, + }, + limits: map[string]v1.ResourceList{ + "node1": { + "limit-exists-in-all": resource.MustParse("100"), + "limit-exists-in-two": resource.MustParse("100"), + "usage-exists-in-all": resource.MustParse("100"), + "usage-exists-in-two": resource.MustParse("100"), + "usage-does-not-exist": resource.MustParse("100"), + }, + "node2": { + "limit-exists-in-all": resource.MustParse("100"), + "limit-exists-in-two": resource.MustParse("100"), + "usage-exists-in-all": resource.MustParse("100"), + "usage-exists-in-two": resource.MustParse("100"), + "usage-does-not-exist": resource.MustParse("100"), + }, + "node3": { + "limit-exists-in-all": resource.MustParse("100"), + "usage-exists-in-all": resource.MustParse("100"), + "usage-exists-in-two": resource.MustParse("100"), + "usage-does-not-exist": resource.MustParse("100"), + }, + "node4": { + "limit-exists-in-all": resource.MustParse("100"), + "usage-exists-in-all": resource.MustParse("100"), + "usage-exists-in-two": resource.MustParse("100"), + "usage-does-not-exist": resource.MustParse("100"), + }, + "node5": { + "random-limit-without-usage": resource.MustParse("100"), + }, + }, + expected: api.ResourceThresholds{ + "limit-exists-in-all": 10, + "limit-exists-in-two": 11, + "usage-exists-in-all": 13, + "usage-exists-in-two": 20, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + average := Average( + Normalize( + tt.usage, tt.limits, ResourceListUsageNormalizer, + ), + ) + if !reflect.DeepEqual(average, tt.expected) { + t.Fatalf("unexpected result: %v, expected: %v", average, tt.expected) + } + }) + } +} + +func TestSum(t *testing.T) { + for _, tt := range []struct { + name string + data api.ResourceThresholds + deviations []api.ResourceThresholds + expected []api.ResourceThresholds + }{ + { + name: "single deviation", + data: api.ResourceThresholds{ + v1.ResourceCPU: 50, + v1.ResourceMemory: 50, + v1.ResourcePods: 50, + }, + deviations: []api.ResourceThresholds{ + { + v1.ResourceCPU: 1, + v1.ResourceMemory: 1, + v1.ResourcePods: 1, + }, + { + v1.ResourceCPU: 2, + v1.ResourceMemory: 2, + v1.ResourcePods: 2, + }, + { + v1.ResourceCPU: 3, + v1.ResourceMemory: 3, + v1.ResourcePods: 3, + }, + }, + expected: []api.ResourceThresholds{ + { + v1.ResourceCPU: 51, + v1.ResourceMemory: 51, + v1.ResourcePods: 51, + }, + { + v1.ResourceCPU: 52, + v1.ResourceMemory: 52, + v1.ResourcePods: 52, + }, + { + v1.ResourceCPU: 53, + v1.ResourceMemory: 53, + v1.ResourcePods: 53, + }, + }, + }, + { + name: "deviate with negative values", + data: api.ResourceThresholds{ + v1.ResourceCPU: 50, + v1.ResourceMemory: 50, + v1.ResourcePods: 50, + }, + deviations: []api.ResourceThresholds{ + { + v1.ResourceCPU: -2, + v1.ResourceMemory: -2, + v1.ResourcePods: -2, + }, + { + v1.ResourceCPU: -1, + v1.ResourceMemory: -1, + v1.ResourcePods: -1, + }, + { + v1.ResourceCPU: 0, + v1.ResourceMemory: 0, + v1.ResourcePods: 0, + }, + { + v1.ResourceCPU: 1, + v1.ResourceMemory: 1, + v1.ResourcePods: 1, + }, + { + v1.ResourceCPU: 2, + v1.ResourceMemory: 2, + v1.ResourcePods: 2, + }, + }, + expected: []api.ResourceThresholds{ + { + v1.ResourceCPU: 48, + v1.ResourceMemory: 48, + v1.ResourcePods: 48, + }, + { + v1.ResourceCPU: 49, + v1.ResourceMemory: 49, + v1.ResourcePods: 49, + }, + { + v1.ResourceCPU: 50, + v1.ResourceMemory: 50, + v1.ResourcePods: 50, + }, + { + v1.ResourceCPU: 51, + v1.ResourceMemory: 51, + v1.ResourcePods: 51, + }, + { + v1.ResourceCPU: 52, + v1.ResourceMemory: 52, + v1.ResourcePods: 52, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + result := []api.ResourceThresholds{} + for _, deviation := range tt.deviations { + partial := Sum(tt.data, deviation) + result = append(result, partial) + } + + if len(result) != len(tt.deviations) { + t.Fatalf("unexpected result: %v", result) + } + if !reflect.DeepEqual(result, tt.expected) { + fmt.Printf("%T, %T\n", result, tt.expected) + t.Fatalf("unexpected result: %v", result) + } + }) + } +} + +func TestClamp(t *testing.T) { + for _, tt := range []struct { + name string + data []api.ResourceThresholds + minimum api.Percentage + maximum api.Percentage + expected []api.ResourceThresholds + }{ + { + name: "all over the limit", + data: []api.ResourceThresholds{ + { + v1.ResourceCPU: 50, + v1.ResourceMemory: 50, + v1.ResourcePods: 50, + }, + }, + minimum: 10, + maximum: 20, + expected: []api.ResourceThresholds{ + { + v1.ResourceCPU: 20, + v1.ResourceMemory: 20, + v1.ResourcePods: 20, + }, + }, + }, + { + name: "some over some below the limits", + data: []api.ResourceThresholds{ + { + v1.ResourceCPU: 7, + v1.ResourceMemory: 8, + v1.ResourcePods: 88, + }, + }, + minimum: 10, + maximum: 20, + expected: []api.ResourceThresholds{ + { + v1.ResourceCPU: 10, + v1.ResourceMemory: 10, + v1.ResourcePods: 20, + }, + }, + }, + { + name: "all within the limits", + data: []api.ResourceThresholds{ + { + v1.ResourceCPU: 15, + v1.ResourceMemory: 15, + v1.ResourcePods: 15, + }, + }, + minimum: 10, + maximum: 20, + expected: []api.ResourceThresholds{ + { + v1.ResourceCPU: 15, + v1.ResourceMemory: 15, + v1.ResourcePods: 15, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + fn := func(thresholds api.ResourceThresholds) api.ResourceThresholds { + return Clamp(thresholds, tt.minimum, tt.maximum) + } + result := Map(tt.data, fn) + if !reflect.DeepEqual(result, tt.expected) { + t.Fatalf("unexpected result: %v", result) + } + }) + } +}