diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 7a5c71234..d04d9b523 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -77,41 +77,6 @@ func NewPodEvictor( } } -// IsEvictable checks if a pod is evictable or not. -func (pe *PodEvictor) IsEvictable(pod *v1.Pod, thresholdPriority int32) bool { - checkErrs := []error{} - if IsCriticalPod(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod is critical")) - } - - if !IsPodEvictableBasedOnPriority(pod, thresholdPriority) { - checkErrs = append(checkErrs, fmt.Errorf("pod is not evictable due to its priority")) - } - - ownerRefList := podutil.OwnerRef(pod) - if IsDaemonsetPod(ownerRefList) { - checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod")) - } - - if len(ownerRefList) == 0 { - checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs")) - } - - if !pe.evictLocalStoragePods && IsPodWithLocalStorage(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods")) - } - - if IsMirrorPod(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod")) - } - - if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) { - klog.V(4).Infof("Pod %s in namespace %s is not evictable: Pod lacks an eviction annotation and fails the following checks: %v", pod.Name, pod.Namespace, errors.NewAggregate(checkErrs).Error()) - return false - } - return true -} - // NodeEvicted gives a number of pods evicted for node func (pe *PodEvictor) NodeEvicted(node *v1.Node) int { return pe.nodepodCount[node] @@ -187,6 +152,87 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli return err } +type Options struct { + priority *int32 +} + +// WithPriorityThreshold sets a threshold for pod's priority class. +// Any pod whose priority class is lower is evictable. +func WithPriorityThreshold(priority int32) func(opts *Options) { + return func(opts *Options) { + var p int32 = priority + opts.priority = &p + } +} + +type constraint func(pod *v1.Pod) error + +type evictable struct { + constraints []constraint +} + +// Evictable provides an implementation of IsEvictable(IsEvictable(pod *v1.Pod) bool). +// The method accepts a list of options which allow to extend constraints +// which decides when a pod is considered evictable. +func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable { + options := &Options{} + for _, opt := range opts { + opt(options) + } + + ev := &evictable{} + if !pe.evictLocalStoragePods { + ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { + if IsPodWithLocalStorage(pod) { + return fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods") + } + return nil + }) + } + if options.priority != nil { + ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { + if IsPodEvictableBasedOnPriority(pod, *options.priority) { + return nil + } + return fmt.Errorf("pod has higher priority than specified priority class threshold") + }) + } + return ev +} + +// IsEvictable decides when a pod is evictable +func (ev *evictable) IsEvictable(pod *v1.Pod) bool { + checkErrs := []error{} + if IsCriticalPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is critical")) + } + + ownerRefList := podutil.OwnerRef(pod) + if IsDaemonsetPod(ownerRefList) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod")) + } + + if len(ownerRefList) == 0 { + checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs")) + } + + if IsMirrorPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod")) + } + + for _, c := range ev.constraints { + if err := c(pod); err != nil { + checkErrs = append(checkErrs, err) + } + } + + if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) { + klog.V(4).Infof("Pod %s in namespace %s is not evictable: Pod lacks an eviction annotation and fails the following checks: %v", pod.Name, pod.Namespace, errors.NewAggregate(checkErrs).Error()) + return false + } + return true +} + func IsCriticalPod(pod *v1.Pod) bool { return utils.IsCriticalPod(pod) } @@ -223,5 +269,5 @@ func IsPodWithLocalStorage(pod *v1.Pod) bool { // IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec. func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool { - return pod.Spec.Priority == nil || (*pod.Spec.Priority < priority && *pod.Spec.Priority < utils.SystemCriticalPriority) + return pod.Spec.Priority == nil || *pod.Spec.Priority < priority } diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index 81f3155f5..20d770ce8 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -241,14 +241,17 @@ func TestIsEvictable(t *testing.T) { for _, test := range testCases { test.runBefore(test.pod) + podEvictor := &PodEvictor{ evictLocalStoragePods: test.evictLocalStoragePods, } - testPriorityThreshold := utils.SystemCriticalPriority + + evictable := podEvictor.Evictable() if test.priorityThreshold != nil { - testPriorityThreshold = *test.priorityThreshold + evictable = podEvictor.Evictable(WithPriorityThreshold(*test.priorityThreshold)) } - result := podEvictor.IsEvictable(test.pod, testPriorityThreshold) + + result := evictable.IsEvictable(test.pod) if result != test.result { t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) } diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index cbb4f8c19..30ce1fe85 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -57,8 +57,8 @@ func WithoutNamespaces(namespaces []string) func(opts *Options) { // ListPodsOnANode lists all of the pods on a node // It also accepts an optional "filter" function which can be used to further limit the pods that are returned. -// (Usually this is podEvictor.IsEvictable, in order to only list the evictable pods on a node, but can -// be used by strategies to extend IsEvictable if there are further restrictions, such as with NodeAffinity). +// (Usually this is podEvictor.Evictable().IsEvictable, in order to only list the evictable pods on a node, but can +// be used by strategies to extend it if there are further restrictions, such as with NodeAffinity). func ListPodsOnANode( ctx context.Context, client clientset.Interface, diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index 7aabd4208..53ba26998 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -67,11 +67,11 @@ func RemoveDuplicatePods( return } + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) - pods, err := podutil.ListPodsOnANode(ctx, client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, thresholdPriority) - })) + pods, err := podutil.ListPodsOnANode(ctx, client, node, podutil.WithFilter(evictable.IsEvictable)) if err != nil { klog.Errorf("error listing evictable pods on node %s: %+v", node.Name, err) continue diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 2a58101bf..7e260881c 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -126,13 +126,15 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods]) klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes)) + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + evictPodsFromTargetNodes( ctx, targetNodes, lowNodes, targetThresholds, podEvictor, - thresholdPriority) + evictable.IsEvictable) klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted()) } @@ -212,7 +214,7 @@ func evictPodsFromTargetNodes( targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, podEvictor *evictions.PodEvictor, - thresholdPriority int32, + podFilter func(pod *v1.Pod) bool, ) { sortNodesByUsage(targetNodes) @@ -249,7 +251,7 @@ func evictPodsFromTargetNodes( } klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage) - nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor, thresholdPriority) + nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter) klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, removablePods:%v", len(node.allPods), len(nonRemovablePods), len(removablePods)) if len(removablePods) == 0 { @@ -410,11 +412,11 @@ func nodeUtilization(node *v1.Node, pods []*v1.Pod) api.ResourceThresholds { } } -func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor, thresholdPriority int32) ([]*v1.Pod, []*v1.Pod) { +func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) { var nonRemovablePods, removablePods []*v1.Pod for _, pod := range pods { - if !evictor.IsEvictable(pod, thresholdPriority) { + if !filter(pod) { nonRemovablePods = append(nonRemovablePods, pod) } else { removablePods = append(removablePods, pod) diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index dc66a1431..a17ac191a 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -64,6 +64,8 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter excludedNamespaces = strategy.Params.Namespaces.Exclude } + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + for _, nodeAffinity := range strategy.Params.NodeAffinityType { klog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity) @@ -77,7 +79,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, thresholdPriority) && + return evictable.IsEvictable(pod) && !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) }), diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index 27f528bf1..3d697864b 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -65,15 +65,15 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa return } + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) pods, err := podutil.ListPodsOnANode( ctx, client, node, - podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, thresholdPriority) - }), + podutil.WithFilter(evictable.IsEvictable), podutil.WithNamespaces(includedNamespaces), podutil.WithoutNamespaces(excludedNamespaces), ) diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index ce2330f5e..8a2a3b072 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -66,15 +66,15 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients return } + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) pods, err := podutil.ListPodsOnANode( ctx, client, node, - podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, thresholdPriority) - }), + podutil.WithFilter(evictable.IsEvictable), podutil.WithNamespaces(includedNamespaces), podutil.WithoutNamespaces(excludedNamespaces), ) diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index ac5dc2707..8a984b77d 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -66,10 +66,12 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D excludedNamespaces = strategy.Params.Namespaces.Exclude } + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) - pods := listOldPodsOnNode(ctx, client, node, includedNamespaces, excludedNamespaces, *strategy.Params.MaxPodLifeTimeSeconds, podEvictor, thresholdPriority) + pods := listOldPodsOnNode(ctx, client, node, includedNamespaces, excludedNamespaces, *strategy.Params.MaxPodLifeTimeSeconds, evictable.IsEvictable) for _, pod := range pods { success, err := podEvictor.EvictPod(ctx, pod, node, "PodLifeTime") if success { @@ -85,14 +87,12 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D } } -func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, includedNamespaces, excludedNamespaces []string, maxPodLifeTimeSeconds uint, podEvictor *evictions.PodEvictor, thresholdPriority int32) []*v1.Pod { +func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, includedNamespaces, excludedNamespaces []string, maxPodLifeTimeSeconds uint, filter func(pod *v1.Pod) bool) []*v1.Pod { pods, err := podutil.ListPodsOnANode( ctx, client, node, - podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, thresholdPriority) - }), + podutil.WithFilter(filter), podutil.WithNamespaces(includedNamespaces), podutil.WithoutNamespaces(excludedNamespaces), ) diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index 96318f20a..94e620f54 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -67,15 +67,15 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter excludedNamespaces = strategy.Params.Namespaces.Exclude } + evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority)) + for _, node := range nodes { klog.V(1).Infof("Processing node: %s", node.Name) pods, err := podutil.ListPodsOnANode( ctx, client, node, - podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, thresholdPriority) - }), + podutil.WithFilter(evictable.IsEvictable), podutil.WithNamespaces(includedNamespaces), podutil.WithoutNamespaces(excludedNamespaces), ) diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go index 0beaa6610..011181ea2 100644 --- a/pkg/utils/pod.go +++ b/pkg/utils/pod.go @@ -109,9 +109,15 @@ func IsCriticalPod(pod *v1.Pod) bool { if IsStaticPod(pod) { return true } + if IsMirrorPod(pod) { return true } + + if pod.Spec.Priority != nil && *pod.Spec.Priority >= SystemCriticalPriority { + return true + } + return false } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 91815f8a8..eaea2d69d 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -25,8 +25,6 @@ import ( "testing" "time" - "sigs.k8s.io/descheduler/pkg/utils" - v1 "k8s.io/api/core/v1" schedulingv1 "k8s.io/api/scheduling/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -654,10 +652,7 @@ func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface, continue } // List all the pods on the current Node - podsOnANode, err := podutil.ListPodsOnANode(ctx, clientSet, node, - podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) - })) + podsOnANode, err := podutil.ListPodsOnANode(ctx, clientSet, node, podutil.WithFilter(podEvictor.Evictable().IsEvictable)) if err != nil { t.Errorf("Error listing pods on a node %v", err) } @@ -669,10 +664,7 @@ func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface, } t.Log("Eviction of pods starting") startEndToEndForLowNodeUtilization(ctx, clientSet, nodeInformer, podEvictor) - podsOnleastUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, leastLoadedNode, - podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) - })) + podsOnleastUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, leastLoadedNode, podutil.WithFilter(podEvictor.Evictable().IsEvictable)) if err != nil { t.Errorf("Error listing pods on a node %v", err) }