diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index dda57ff76..a83dc704d 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -102,11 +102,18 @@ func (pe *PodEvictor) TotalEvicted() uint { return total } -// EvictPod returns non-nil error only when evicting a pod on a node is not -// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod -// is evicted on the server side. -// eviction reason can be set through the ctx's evictionReason:STRING pair -func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { +// NodeLimitExceeded checks if the number of evictions for a node was exceeded +func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool { + if pe.maxPodsToEvictPerNode != nil { + return pe.nodepodCount[node.Name] == *pe.maxPodsToEvictPerNode + } + return false +} + +// EvictPod evicts a pod while exercising eviction limits. +// Returns true when the pod is evicted on the server side. +// Eviction reason can be set through the ctx's evictionReason:STRING pair +func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) bool { strategy := "" if ctx.Value("strategyName") != nil { strategy = ctx.Value("strategyName").(string) @@ -121,7 +128,8 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } - return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, pod.Spec.NodeName) + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per node reached"), "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName) + return false } } @@ -129,7 +137,8 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } - return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace) + klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per namespace reached"), "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace) + return false } err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion) @@ -139,7 +148,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } - return false, nil + return false } if pod.Spec.NodeName != "" { @@ -161,7 +170,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) (bool, error) { r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"}) r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason)) } - return true, nil + return true } func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error { diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index a38d53d87..f4dc13bf6 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -192,6 +192,7 @@ func RemoveDuplicatePods( } upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes)))) + loop: for nodeName, pods := range podNodes { klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg) // list of duplicated pods does not contain the original referential pod @@ -199,9 +200,9 @@ func RemoveDuplicatePods( // It's assumed all duplicated pods are in the same priority class // TODO(jchaloup): check if the pod has a different node to lend to for _, pod := range pods[upperAvg-1:] { - if _, err := podEvictor.EvictPod(ctx, pod); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pod) + if podEvictor.NodeLimitExceeded(nodeMap[nodeName]) { + continue loop } } } diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index ab8b10ff5..0dac0c0c0 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -75,9 +75,9 @@ func RemoveFailedPods( continue } - if _, err = podEvictor.EvictPod(ctx, pods[i]); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pods[i]) + if podEvictor.NodeLimitExceeded(node) { + continue } } } diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index 9fb8fd90b..39145b095 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -74,6 +74,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter switch nodeAffinity { case "requiredDuringSchedulingIgnoredDuringExecution": + loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) @@ -93,9 +94,9 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter for _, pod := range pods { if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) - if _, err := podEvictor.EvictPod(ctx, pod); err != nil { - klog.ErrorS(err, "Error evicting pod") - break + podEvictor.EvictPod(ctx, pod) + if podEvictor.NodeLimitExceeded(node) { + continue loop } } } diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index c5e4cecb9..48cdcf48c 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -91,6 +91,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa } } +loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) @@ -106,9 +107,9 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa taintFilterFnc, ) { klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) - if _, err := podEvictor.EvictPod(ctx, pods[i]); err != nil { - klog.ErrorS(err, "Error evicting pod") - break + podEvictor.EvictPod(ctx, pods[i]) + if podEvictor.NodeLimitExceeded(node) { + continue loop } } } diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index 9e93819b4..5790141f9 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -313,14 +313,8 @@ func evictPods( continue } - success, err := podEvictor.EvictPod(ctx, pod) - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } - - if success { - klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod), "err", err) + if podEvictor.EvictPod(ctx, pod) { + klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod)) for name := range totalAvailableUsage { if name == v1.ResourcePods { @@ -351,6 +345,9 @@ func evictPods( break } } + if podEvictor.NodeLimitExceeded(nodeInfo.node) { + return + } } } } diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index 56f70e3c7..b13418c13 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -75,6 +75,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients return } +loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) @@ -86,13 +87,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients totalPods := len(pods) for i := 0; i < totalPods; i++ { if checkPodsWithAntiAffinityExist(pods[i], pods) && evictorFilter.Filter(pods[i]) { - success, err := podEvictor.EvictPod(ctx, pods[i]) - if err != nil { - klog.ErrorS(err, "Error evicting pod") - break - } - - if success { + if podEvictor.EvictPod(ctx, pods[i]) { // Since the current pod is evicted all other pods which have anti-affinity with this // pod need not be evicted. // Update pods. @@ -101,6 +96,9 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients totalPods-- } } + if podEvictor.NodeLimitExceeded(node) { + continue loop + } } } } diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 84f406747..33ad25bfd 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -106,15 +106,16 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D // in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction podutil.SortPodsBasedOnAge(podsToEvict) + nodeLimitExceeded := map[string]bool{} for _, pod := range podsToEvict { - success, err := podEvictor.EvictPod(ctx, pod) - if success { + if nodeLimitExceeded[pod.Spec.NodeName] { + continue + } + if podEvictor.EvictPod(ctx, pod) { klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) } - - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + if podEvictor.NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) { + nodeLimitExceeded[pod.Spec.NodeName] = true } } } diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index 22887081b..914046623 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -72,6 +72,7 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter return } +loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter) @@ -89,9 +90,9 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter } else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { continue } - if _, err := podEvictor.EvictPod(ctx, pods[i]); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pods[i]) + if podEvictor.NodeLimitExceeded(node) { + continue loop } } } diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 65a30ef8d..87d11c4ab 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -183,13 +183,17 @@ func RemovePodsViolatingTopologySpreadConstraint( } } + nodeLimitExceeded := map[string]bool{} for pod := range podsForEviction { + if nodeLimitExceeded[pod.Spec.NodeName] { + continue + } if !isEvictable(pod) { continue } - if _, err := podEvictor.EvictPod(ctx, pod); err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break + podEvictor.EvictPod(ctx, pod) + if podEvictor.NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) { + nodeLimitExceeded[pod.Spec.NodeName] = true } } }