From d5e66ab62e3683dfce663417ba46f957b5b29b59 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Mon, 11 Jul 2022 15:01:34 +0000 Subject: [PATCH] Add EvictOptions struct to EvictPod() --- pkg/descheduler/descheduler.go | 15 ++++++++++++- pkg/descheduler/evictions/evictions.go | 22 ++++++++++--------- pkg/descheduler/strategies/duplicates.go | 2 +- pkg/descheduler/strategies/failedpods.go | 2 +- pkg/descheduler/strategies/node_affinity.go | 3 +-- pkg/descheduler/strategies/node_taint.go | 2 +- .../nodeutilization/nodeutilization.go | 2 +- .../strategies/pod_antiaffinity.go | 2 +- pkg/descheduler/strategies/pod_lifetime.go | 2 +- pkg/descheduler/strategies/toomanyrestarts.go | 2 +- .../strategies/topologyspreadconstraint.go | 4 ++-- 11 files changed, 36 insertions(+), 22 deletions(-) diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 1cf8338b9..15d138ff0 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -301,7 +301,20 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer klog.ErrorS(err, "Failed to get threshold priority from strategy's params") continue } - evictorFilter := evictions.NewEvictorFilter(nodes, getPodsAssignedToNode, evictLocalStoragePods, evictSystemCriticalPods, ignorePvcPods, evictBarePods, evictions.WithNodeFit(nodeFit), evictions.WithPriorityThreshold(thresholdPriority)) + evictorFilter := evictions.NewEvictorFilter( + nodes, + getPodsAssignedToNode, + evictLocalStoragePods, + evictSystemCriticalPods, + ignorePvcPods, + evictBarePods, + evictions.WithNodeFit(nodeFit), + evictions.WithPriorityThreshold(thresholdPriority), + ) + // TODO: strategyName should be accessible from within the strategy using a framework + // handle or function which the Evictor has access to. For migration/in-progress framework + // work, we are currently passing this via context. To be removed + // (See discussion thread https://github.com/kubernetes-sigs/descheduler/pull/885#discussion_r919962292) f(context.WithValue(ctx, "strategyName", string(name)), rs.Client, strategy, nodes, podEvictor, evictorFilter, getPodsAssignedToNode) } } else { diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index a83dc704d..74fb02a38 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -110,18 +110,20 @@ func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool { return false } +// EvictOptions provides a handle for passing additional info to EvictPod +type EvictOptions struct { + // Reason allows for passing details about the specific eviction for logging. + Reason string +} + // EvictPod evicts a pod while exercising eviction limits. // Returns true when the pod is evicted on the server side. -// Eviction reason can be set through the ctx's evictionReason:STRING pair -func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) bool { +func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) bool { + // TODO: Replace context-propagated Strategy name with a defined framework handle for accessing Strategy info strategy := "" if ctx.Value("strategyName") != nil { strategy = ctx.Value("strategyName").(string) } - reason := "" - if ctx.Value("evictionReason") != nil { - reason = ctx.Value("evictionReason").(string) - } if pod.Spec.NodeName != "" { if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[pod.Spec.NodeName]+1 > *pe.maxPodsToEvictPerNode { @@ -144,7 +146,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) bool { err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion) if err != nil { // err is used only for logging purposes - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason) + klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", opts.Reason) if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc() } @@ -161,14 +163,14 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod) bool { } if pe.dryRun { - klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", pod.Spec.NodeName) + klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName) } else { - klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", pod.Spec.NodeName) + klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName) eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartStructuredLogging(3) eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)}) r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"}) - r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason)) + r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", opts.Reason)) } return true } diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index f4dc13bf6..8b99295a1 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -200,7 +200,7 @@ func RemoveDuplicatePods( // It's assumed all duplicated pods are in the same priority class // TODO(jchaloup): check if the pod has a different node to lend to for _, pod := range pods[upperAvg-1:] { - podEvictor.EvictPod(ctx, pod) + podEvictor.EvictPod(ctx, pod, evictions.EvictOptions{}) if podEvictor.NodeLimitExceeded(nodeMap[nodeName]) { continue loop } diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index 0dac0c0c0..e12eb4c82 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -75,7 +75,7 @@ func RemoveFailedPods( continue } - podEvictor.EvictPod(ctx, pods[i]) + podEvictor.EvictPod(ctx, pods[i], evictions.EvictOptions{}) if podEvictor.NodeLimitExceeded(node) { continue } diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index 39145b095..5e3b4fec3 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -24,7 +24,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" - "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" @@ -94,7 +93,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter for _, pod := range pods { if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod)) - podEvictor.EvictPod(ctx, pod) + podEvictor.EvictPod(ctx, pod, evictions.EvictOptions{}) if podEvictor.NodeLimitExceeded(node) { continue loop } diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index 48cdcf48c..2265058a5 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -107,7 +107,7 @@ loop: taintFilterFnc, ) { klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node)) - podEvictor.EvictPod(ctx, pods[i]) + podEvictor.EvictPod(ctx, pods[i], evictions.EvictOptions{}) if podEvictor.NodeLimitExceeded(node) { continue loop } diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index 5790141f9..b7b98879e 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -313,7 +313,7 @@ func evictPods( continue } - if podEvictor.EvictPod(ctx, pod) { + if podEvictor.EvictPod(ctx, pod, evictions.EvictOptions{}) { klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod)) for name := range totalAvailableUsage { diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index b13418c13..c590bfb67 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -87,7 +87,7 @@ loop: totalPods := len(pods) for i := 0; i < totalPods; i++ { if checkPodsWithAntiAffinityExist(pods[i], pods) && evictorFilter.Filter(pods[i]) { - if podEvictor.EvictPod(ctx, pods[i]) { + if podEvictor.EvictPod(ctx, pods[i], evictions.EvictOptions{}) { // Since the current pod is evicted all other pods which have anti-affinity with this // pod need not be evicted. // Update pods. diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 6149ea27a..3c84179c9 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -131,7 +131,7 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D if nodeLimitExceeded[pod.Spec.NodeName] { continue } - if podEvictor.EvictPod(ctx, pod) { + if podEvictor.EvictPod(ctx, pod, evictions.EvictOptions{}) { klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) } if podEvictor.NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) { diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index 914046623..c144c4b54 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -90,7 +90,7 @@ loop: } else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold { continue } - podEvictor.EvictPod(ctx, pods[i]) + podEvictor.EvictPod(ctx, pods[i], evictions.EvictOptions{}) if podEvictor.NodeLimitExceeded(node) { continue loop } diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index dbd903fbe..37c5c9820 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -191,7 +191,7 @@ func RemovePodsViolatingTopologySpreadConstraint( if !isEvictable(pod) { continue } - podEvictor.EvictPod(ctx, pod) + podEvictor.EvictPod(ctx, pod, evictions.EvictOptions{}) if podEvictor.NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) { nodeLimitExceeded[pod.Spec.NodeName] = true } @@ -240,7 +240,7 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol // whichever number is less. // // (Note, we will only move as many pods from a domain as possible without bringing it below the ideal average, -// and we will not bring any smaller domain above the average) +// and we will not bring any smaller domain above the average) // If the diff is within the skew, we move to the next highest domain. // If the higher domain can't give any more without falling below the average, we move to the next lowest "high" domain //