From c239e1199f3a20a99931aecb5e580d687af2a909 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Tue, 6 Apr 2021 09:42:50 +0200 Subject: [PATCH] LNU: improve nodeUsage logging To avoid: ``` I0210 11:56:04.137956 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]} I0210 11:56:04.138829 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]} I0210 11:56:04.139044 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]} ``` --- README.md | 2 +- .../strategies/lownodeutilization.go | 54 +++++++++++-------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index f08837a78..2cc1644b4 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ The under utilization of nodes is determined by a configurable threshold `thresh calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity). For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node). -If a node's usage is below threshold for all (cpu, memory, and number of pods), the node is considered underutilized. +If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized. Currently, pods request resource requirements are considered for computing node resource utilization. There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 27614f7db..f8b751a99 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -116,9 +116,9 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg // log message in one line keysAndValues := []interface{}{ - "CPU", int64(float64(thresholds[v1.ResourceCPU])), - "Mem", int64(float64(thresholds[v1.ResourceMemory])), - "Pods", int64(float64(thresholds[v1.ResourcePods])), + "CPU", thresholds[v1.ResourceCPU], + "Mem", thresholds[v1.ResourceMemory], + "Pods", thresholds[v1.ResourcePods], } for name := range thresholds { if !isBasicResource(name) { @@ -126,12 +126,26 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg } } klog.V(1).InfoS("Criteria for a node under utilization", keysAndValues...) + klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes)) + + // log message in one line + keysAndValues = []interface{}{ + "CPU", targetThresholds[v1.ResourceCPU], + "Mem", targetThresholds[v1.ResourceMemory], + "Pods", targetThresholds[v1.ResourcePods], + } + for name := range targetThresholds { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), int64(float64(targetThresholds[name]))) + } + } + klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...) + klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(targetNodes)) if len(lowNodes) == 0 { klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further") return } - klog.V(1).InfoS("Total number of underutilized nodes", "totalNumber", len(lowNodes)) if len(lowNodes) < strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes { klog.V(1).InfoS("Number of nodes underutilized is less than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes) @@ -148,20 +162,6 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg return } - // log message in one line - keysAndValues = []interface{}{ - "CPU", int64(float64(targetThresholds[v1.ResourceCPU])), - "Mem", int64(float64(targetThresholds[v1.ResourceMemory])), - "Pods", int64(float64(targetThresholds[v1.ResourcePods])), - } - for name := range targetThresholds { - if !isBasicResource(name) { - keysAndValues = append(keysAndValues, string(name), int64(float64(targetThresholds[name]))) - } - } - klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...) - - klog.V(1).InfoS("Number of nodes above target utilization", "totalNumber", len(targetNodes)) evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) evictPodsFromTargetNodes( @@ -323,7 +323,7 @@ func evictPodsFromTargetNodes( sortNodesByUsage(targetNodes) - // upper bound on total number of pods/cpu/memory and optional extened resources to be moved + // upper bound on total number of pods/cpu/memory and optional extended resources to be moved totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{ v1.ResourcePods: {}, v1.ResourceCPU: {}, @@ -334,7 +334,7 @@ func evictPodsFromTargetNodes( for _, node := range lowNodes { taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints - for name := range node.highResourceThreshold { + for _, name := range resourceNames { if _, ok := totalAvailableUsage[name]; !ok { totalAvailableUsage[name] = resource.NewQuantity(0, resource.DecimalSI) } @@ -424,7 +424,19 @@ func evictPods( } } - klog.V(3).InfoS("Updated node usage", "updatedUsage", nodeUsage) + keysAndValues := []interface{}{ + "node", nodeUsage.node.Name, + "CPU", nodeUsage.usage[v1.ResourceCPU].MilliValue(), + "Mem", nodeUsage.usage[v1.ResourceMemory].Value(), + "Pods", nodeUsage.usage[v1.ResourcePods].Value(), + } + for name := range totalAvailableUsage { + if !isBasicResource(name) { + keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value()) + } + } + + klog.V(3).InfoS("Updated node usage", keysAndValues...) // check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved if !continueCond() { break