From d5ee8552215463efef0297849631caf8b1419a76 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Thu, 9 Jun 2022 11:50:09 +0200 Subject: [PATCH 1/3] Pass the strategy name into evictor through context --- pkg/descheduler/descheduler.go | 2 +- pkg/descheduler/evictions/evictions.go | 15 ++++++++++----- pkg/descheduler/strategies/duplicates.go | 2 +- pkg/descheduler/strategies/failedpods.go | 2 +- pkg/descheduler/strategies/node_affinity.go | 2 +- pkg/descheduler/strategies/node_taint.go | 2 +- .../strategies/nodeutilization/nodeutilization.go | 2 +- pkg/descheduler/strategies/pod_antiaffinity.go | 2 +- pkg/descheduler/strategies/pod_lifetime.go | 2 +- pkg/descheduler/strategies/toomanyrestarts.go | 2 +- .../strategies/topologyspreadconstraint.go | 2 +- 11 files changed, 20 insertions(+), 15 deletions(-) diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index c03f10a29..1cf8338b9 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -302,7 +302,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer continue } evictorFilter := evictions.NewEvictorFilter(nodes, getPodsAssignedToNode, evictLocalStoragePods, evictSystemCriticalPods, ignorePvcPods, evictBarePods, evictions.WithNodeFit(nodeFit), evictions.WithPriorityThreshold(thresholdPriority)) - f(ctx, rs.Client, strategy, nodes, podEvictor, evictorFilter, getPodsAssignedToNode) + f(context.WithValue(ctx, "strategyName", string(name)), rs.Client, strategy, nodes, podEvictor, evictorFilter, getPodsAssignedToNode) } } else { klog.ErrorS(fmt.Errorf("unknown strategy name"), "skipping strategy", "strategy", name) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 39f5ef6e8..2050db171 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -19,7 +19,6 @@ package evictions import ( "context" "fmt" - "strings" v1 "k8s.io/api/core/v1" policy "k8s.io/api/policy/v1beta1" @@ -106,11 +105,17 @@ func (pe *PodEvictor) TotalEvicted() uint { // EvictPod returns non-nil error only when evicting a pod on a node is not // possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod // is evicted on the server side. -func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, strategy string, reasons ...string) (bool, error) { - reason := strategy - if len(reasons) > 0 { - reason += " (" + strings.Join(reasons, ", ") + ")" +// eviction reason can be set through the ctx's evictionReason:STRING pair +func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) (bool, error) { + strategy := "" + if ctx.Value("strategyName") != nil { + strategy = ctx.Value("strategyName").(string) } + reason := "" + if ctx.Value("evictionReason") != nil { + reason = ctx.Value("evictionReason").(string) + } + if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[node]+1 > *pe.maxPodsToEvictPerNode { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc() diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index d5e11f0f5..98fdbb0c5 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -199,7 +199,7 @@ func RemoveDuplicatePods( // It's assumed all duplicated pods are in the same priority class // TODO(jchaloup): check if the pod has a different node to lend to for _, pod := range pods[upperAvg-1:] { - if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil { + if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName]); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index b9bfabaea..fd4106c78 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -75,7 +75,7 @@ func RemoveFailedPods( continue } - if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil { + if _, err = podEvictor.EvictPod(ctx, pods[i], node); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index c3c21fec9..29d3ec0ab 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -93,7 +93,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter for _, pod := range pods { if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) - if _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil { + if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil { klog.ErrorS(err, "Error evicting pod") break } diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index 094ded915..1ff231f3f 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -106,7 +106,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa taintFilterFnc, ) { klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) - if _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil { + if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { klog.ErrorS(err, "Error evicting pod") break } diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index 9c155851d..10944fdd9 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -313,7 +313,7 @@ func evictPods( continue } - success, err := podEvictor.EvictPod(ctx, pod, nodeInfo.node, strategy) + success, err := podEvictor.EvictPod(ctx, pod, nodeInfo.node) if err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index ded994f97..0ca9522d5 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -86,7 +86,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients totalPods := len(pods) for i := 0; i < totalPods; i++ { if checkPodsWithAntiAffinityExist(pods[i], pods) && evictorFilter.Filter(pods[i]) { - success, err := podEvictor.EvictPod(ctx, pods[i], node, "InterPodAntiAffinity") + success, err := podEvictor.EvictPod(ctx, pods[i], node) if err != nil { klog.ErrorS(err, "Error evicting pod") break diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 93cd28b93..d5bda4eee 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -107,7 +107,7 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D podutil.SortPodsBasedOnAge(podsToEvict) for _, pod := range podsToEvict { - success, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName], "PodLifeTime") + success, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName]) if success { klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) } diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index 7e5431e01..cfa8f7751 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -89,7 +89,7 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter } else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { continue } - if _, err := podEvictor.EvictPod(ctx, pods[i], node, "TooManyRestarts"); err != nil { + if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 531bc4560..8473c8fcd 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -187,7 +187,7 @@ func RemovePodsViolatingTopologySpreadConstraint( if !isEvictable(pod) { continue } - if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName], "PodTopologySpread"); err != nil { + if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName]); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } From cc49f9fcc23297bd467c7b0d6237d9db0a5222f4 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Thu, 9 Jun 2022 12:02:49 +0200 Subject: [PATCH 2/3] Drop node parameter from EvictPod The method uses the node object to only get the node name. The node name can be retrieved from the pod object. Some strategies might try to evict a pod in Pending state which does not have the .spec.nodeName field set. Thus, skipping the test for the node limit. --- pkg/descheduler/evictions/evictions.go | 32 +++++++++++-------- pkg/descheduler/strategies/duplicates.go | 2 +- pkg/descheduler/strategies/failedpods.go | 2 +- pkg/descheduler/strategies/node_affinity.go | 2 +- pkg/descheduler/strategies/node_taint.go | 2 +- .../nodeutilization/nodeutilization.go | 2 +- .../strategies/pod_antiaffinity.go | 2 +- pkg/descheduler/strategies/pod_lifetime.go | 2 +- pkg/descheduler/strategies/toomanyrestarts.go | 2 +- .../strategies/topologyspreadconstraint.go | 2 +- 10 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 2050db171..dda57ff76 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -44,7 +44,7 @@ const ( ) // nodePodEvictedCount keeps count of pods evicted on node -type nodePodEvictedCount map[*v1.Node]uint +type nodePodEvictedCount map[string]uint type namespacePodEvictCount map[string]uint type PodEvictor struct { @@ -72,7 +72,7 @@ func NewPodEvictor( var namespacePodCount = make(namespacePodEvictCount) for _, node := range nodes { // Initialize podsEvicted till now with 0. - nodePodCount[node] = 0 + nodePodCount[node.Name] = 0 } return &PodEvictor{ @@ -90,7 +90,7 @@ func NewPodEvictor( // NodeEvicted gives a number of pods evicted for node func (pe *PodEvictor) NodeEvicted(node *v1.Node) uint { - return pe.nodepodCount[node] + return pe.nodepodCount[node.Name] } // TotalEvicted gives a number of pods evicted through all nodes @@ -106,7 +106,7 @@ func (pe *PodEvictor) TotalEvicted() uint { // possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod // is evicted on the server side. // eviction reason can be set through the ctx's evictionReason:STRING pair -func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) (bool, error) { +func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { strategy := "" if ctx.Value("strategyName") != nil { strategy = ctx.Value("strategyName").(string) @@ -116,16 +116,18 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) reason = ctx.Value("evictionReason").(string) } - if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[node]+1 > *pe.maxPodsToEvictPerNode { - if pe.metricsEnabled { - metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc() + if pod.Spec.NodeName != "" { + if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[pod.Spec.NodeName]+1 > *pe.maxPodsToEvictPerNode { + if pe.metricsEnabled { + metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() + } + return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, pod.Spec.NodeName) } - return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, node.Name) } if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace { if pe.metricsEnabled { - metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc() + metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace) } @@ -135,22 +137,24 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) // err is used only for logging purposes klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason) if pe.metricsEnabled { - metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc() + metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } return false, nil } - pe.nodepodCount[node]++ + if pod.Spec.NodeName != "" { + pe.nodepodCount[pod.Spec.NodeName]++ + } pe.namespacePodCount[pod.Namespace]++ if pe.metricsEnabled { - metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc() + metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } if pe.dryRun { - klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", node.Name) + klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", pod.Spec.NodeName) } else { - klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", node.Name) + klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", pod.Spec.NodeName) eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartStructuredLogging(3) eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)}) diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index 98fdbb0c5..a38d53d87 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -199,7 +199,7 @@ func RemoveDuplicatePods( // It's assumed all duplicated pods are in the same priority class // TODO(jchaloup): check if the pod has a different node to lend to for _, pod := range pods[upperAvg-1:] { - if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName]); err != nil { + if _, err := podEvictor.EvictPod(ctx, pod); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index fd4106c78..ab8b10ff5 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -75,7 +75,7 @@ func RemoveFailedPods( continue } - if _, err = podEvictor.EvictPod(ctx, pods[i], node); err != nil { + if _, err = podEvictor.EvictPod(ctx, pods[i]); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index 29d3ec0ab..9fb8fd90b 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -93,7 +93,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter for _, pod := range pods { if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) - if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil { + if _, err := podEvictor.EvictPod(ctx, pod); err != nil { klog.ErrorS(err, "Error evicting pod") break } diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index 1ff231f3f..c5e4cecb9 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -106,7 +106,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa taintFilterFnc, ) { klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) - if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { + if _, err := podEvictor.EvictPod(ctx, pods[i]); err != nil { klog.ErrorS(err, "Error evicting pod") break } diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index 10944fdd9..9e93819b4 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -313,7 +313,7 @@ func evictPods( continue } - success, err := podEvictor.EvictPod(ctx, pod, nodeInfo.node) + success, err := podEvictor.EvictPod(ctx, pod) if err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index 0ca9522d5..56f70e3c7 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -86,7 +86,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients totalPods := len(pods) for i := 0; i < totalPods; i++ { if checkPodsWithAntiAffinityExist(pods[i], pods) && evictorFilter.Filter(pods[i]) { - success, err := podEvictor.EvictPod(ctx, pods[i], node) + success, err := podEvictor.EvictPod(ctx, pods[i]) if err != nil { klog.ErrorS(err, "Error evicting pod") break diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index d5bda4eee..84f406747 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -107,7 +107,7 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D podutil.SortPodsBasedOnAge(podsToEvict) for _, pod := range podsToEvict { - success, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName]) + success, err := podEvictor.EvictPod(ctx, pod) if success { klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) } diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index cfa8f7751..22887081b 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -89,7 +89,7 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter } else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { continue } - if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { + if _, err := podEvictor.EvictPod(ctx, pods[i]); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 8473c8fcd..65a30ef8d 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -187,7 +187,7 @@ func RemovePodsViolatingTopologySpreadConstraint( if !isEvictable(pod) { continue } - if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName]); err != nil { + if _, err := podEvictor.EvictPod(ctx, pod); err != nil { klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) break } From c838614b6ce30604a8b4b0b0cb34a3a0555e6572 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Thu, 9 Jun 2022 12:48:47 +0200 Subject: [PATCH 3/3] EvictPod: stop returning an error When an error is returned a strategy either stops completely or starts processing another node. Given the error can be a transient error or only one of the limits can get exceeded it is fair to just skip a pod that failed eviction and proceed to the next instead. In order to optimize the processing and stop earlier, it is more practical to implement a check which will say when a limit was exceeded. --- pkg/descheduler/evictions/evictions.go | 27 ++++++++++++------- pkg/descheduler/strategies/duplicates.go | 7 ++--- pkg/descheduler/strategies/failedpods.go | 6 ++--- pkg/descheduler/strategies/node_affinity.go | 7 ++--- pkg/descheduler/strategies/node_taint.go | 7 ++--- .../nodeutilization/nodeutilization.go | 13 ++++----- .../strategies/pod_antiaffinity.go | 12 ++++----- pkg/descheduler/strategies/pod_lifetime.go | 13 ++++----- pkg/descheduler/strategies/toomanyrestarts.go | 7 ++--- .../strategies/topologyspreadconstraint.go | 10 ++++--- 10 files changed, 61 insertions(+), 48 deletions(-) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index dda57ff76..a83dc704d 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -102,11 +102,18 @@ func (pe *PodEvictor) TotalEvicted() uint { return total } -// EvictPod returns non-nil error only when evicting a pod on a node is not -// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod -// is evicted on the server side. -// eviction reason can be set through the ctx's evictionReason:STRING pair -func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { +// NodeLimitExceeded checks if the number of evictions for a node was exceeded +func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool { + if pe.maxPodsToEvictPerNode != nil { + return pe.nodepodCount[node.Name] == *pe.maxPodsToEvictPerNode + } + return false +} + +// EvictPod evicts a pod while exercising eviction limits. +// Returns true when the pod is evicted on the server side. +// Eviction reason can be set through the ctx's evictionReason:STRING pair +func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) bool { strategy := "" if ctx.Value("strategyName") != nil { strategy = ctx.Value("strategyName").(string) @@ -121,7 +128,8 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } - return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, pod.Spec.NodeName) + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per node reached"), "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName) + return false } } @@ -129,7 +137,8 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } - return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace) + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per namespace reached"), "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace) + return false } err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion) @@ -139,7 +148,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } - return false, nil + return false } if pod.Spec.NodeName != "" { @@ -161,7 +170,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"}) r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason)) } - return true, nil + return true } func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error { diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index a38d53d87..f4dc13bf6 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -192,6 +192,7 @@ func RemoveDuplicatePods( } upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes)))) + loop: for nodeName, pods := range podNodes { klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg) // list of duplicated pods does not contain the original referential pod @@ -199,9 +200,9 @@ func RemoveDuplicatePods( // It's assumed all duplicated pods are in the same priority class // TODO(jchaloup): check if the pod has a different node to lend to for _, pod := range pods[upperAvg-1:] { - if _, err := podEvictor.EvictPod(ctx, pod); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pod) + if podEvictor.NodeLimitExceeded(nodeMap[nodeName]) { + continue loop } } } diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index ab8b10ff5..0dac0c0c0 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -75,9 +75,9 @@ func RemoveFailedPods( continue } - if _, err = podEvictor.EvictPod(ctx, pods[i]); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pods[i]) + if podEvictor.NodeLimitExceeded(node) { + continue } } } diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index 9fb8fd90b..39145b095 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -74,6 +74,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter switch nodeAffinity { case "requiredDuringSchedulingIgnoredDuringExecution": + loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) @@ -93,9 +94,9 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter for _, pod := range pods { if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) - if _, err := podEvictor.EvictPod(ctx, pod); err != nil { - klog.ErrorS(err, "Error evicting pod") - break + podEvictor.EvictPod(ctx, pod) + if podEvictor.NodeLimitExceeded(node) { + continue loop } } } diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index c5e4cecb9..48cdcf48c 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -91,6 +91,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa } } +loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) @@ -106,9 +107,9 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa taintFilterFnc, ) { klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) - if _, err := podEvictor.EvictPod(ctx, pods[i]); err != nil { - klog.ErrorS(err, "Error evicting pod") - break + podEvictor.EvictPod(ctx, pods[i]) + if podEvictor.NodeLimitExceeded(node) { + continue loop } } } diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index 9e93819b4..5790141f9 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -313,14 +313,8 @@ func evictPods( continue } - success, err := podEvictor.EvictPod(ctx, pod) - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } - - if success { - klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod), "err", err) + if podEvictor.EvictPod(ctx, pod) { + klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod)) for name := range totalAvailableUsage { if name == v1.ResourcePods { @@ -351,6 +345,9 @@ func evictPods( break } } + if podEvictor.NodeLimitExceeded(nodeInfo.node) { + return + } } } } diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index 56f70e3c7..b13418c13 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -75,6 +75,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients return } +loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) @@ -86,13 +87,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients totalPods := len(pods) for i := 0; i < totalPods; i++ { if checkPodsWithAntiAffinityExist(pods[i], pods) && evictorFilter.Filter(pods[i]) { - success, err := podEvictor.EvictPod(ctx, pods[i]) - if err != nil { - klog.ErrorS(err, "Error evicting pod") - break - } - - if success { + if podEvictor.EvictPod(ctx, pods[i]) { // Since the current pod is evicted all other pods which have anti-affinity with this // pod need not be evicted. // Update pods. @@ -101,6 +96,9 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients totalPods-- } } + if podEvictor.NodeLimitExceeded(node) { + continue loop + } } } } diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 84f406747..33ad25bfd 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -106,15 +106,16 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D // in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction podutil.SortPodsBasedOnAge(podsToEvict) + nodeLimitExceeded := map[string]bool{} for _, pod := range podsToEvict { - success, err := podEvictor.EvictPod(ctx, pod) - if success { + if nodeLimitExceeded[pod.Spec.NodeName] { + continue + } + if podEvictor.EvictPod(ctx, pod) { klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) } - - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + if podEvictor.NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) { + nodeLimitExceeded[pod.Spec.NodeName] = true } } } diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index 22887081b..914046623 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -72,6 +72,7 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter return } +loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) @@ -89,9 +90,9 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter } else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { continue } - if _, err := podEvictor.EvictPod(ctx, pods[i]); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pods[i]) + if podEvictor.NodeLimitExceeded(node) { + continue loop } } } diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 65a30ef8d..87d11c4ab 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -183,13 +183,17 @@ func RemovePodsViolatingTopologySpreadConstraint( } } + nodeLimitExceeded := map[string]bool{} for pod := range podsForEviction { + if nodeLimitExceeded[pod.Spec.NodeName] { + continue + } if !isEvictable(pod) { continue } - if _, err := podEvictor.EvictPod(ctx, pod); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pod) + if podEvictor.NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) { + nodeLimitExceeded[pod.Spec.NodeName] = true } } }