From b5d7219391df61d15f1b7f1b1e56a71f8a29d68a Mon Sep 17 00:00:00 2001 From: RyanDevlin Date: Wed, 10 Mar 2021 16:23:52 -0500 Subject: [PATCH] Completed evictSystemCriticalPods feature --- .gitignore | 1 + README.md | 13 +- pkg/api/types.go | 3 + pkg/api/v1alpha1/types.go | 3 + pkg/descheduler/descheduler.go | 9 + pkg/descheduler/evictions/evictions.go | 113 ++++----- pkg/descheduler/evictions/evictions_test.go | 193 ++++++++++----- pkg/descheduler/strategies/duplicates_test.go | 2 + .../strategies/lownodeutilization_test.go | 2 + .../strategies/node_affinity_test.go | 3 +- pkg/descheduler/strategies/node_taint_test.go | 26 +- .../strategies/pod_antiaffinity_test.go | 3 +- .../strategies/pod_lifetime_test.go | 1 + .../strategies/toomanyrestarts_test.go | 3 +- .../topologyspreadconstraint_test.go | 4 +- pkg/utils/pod.go | 56 +++-- test/e2e/e2e_test.go | 233 ++++++++++++++---- test/run-e2e-tests.sh | 2 +- 18 files changed, 462 insertions(+), 208 deletions(-) diff --git a/.gitignore b/.gitignore index 7eb986f87..acf52a5cd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ _tmp/ vendordiff.patch .idea/ *.code-workspace +.vscode/ diff --git a/README.md b/README.md index a5bb74da0..235e73881 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,8 @@ parameters associated with the strategies can be configured too. By default, all The policy also includes common configuration for all the strategies: - `nodeSelector` - limiting the nodes which are processed -- `evictLocalStoragePods` - allowing to evict pods with local storage +- `evictLocalStoragePods` - allows eviction of pods with local storage +- `evictSystemCriticalPods` - [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns - `ignorePvcPods` - set whether PVC pods should be evicted or ignored (defaults to `false`) - `maxNoOfPodsToEvictPerNode` - maximum number of pods evicted from each node (summed through all strategies) @@ -122,6 +123,7 @@ apiVersion: "descheduler/v1alpha1" kind: "DeschedulerPolicy" nodeSelector: prod=dev evictLocalStoragePods: true +evictSystemCriticalPods: true maxNoOfPodsToEvictPerNode: 40 ignorePvcPods: false strategies: @@ -480,6 +482,9 @@ All strategies are able to configure a priority threshold, only pods under the t specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold is set to the value of `system-cluster-critical` priority class. + +Note: Setting `evictSystemCriticalPods` to true disables priority filtering entirely. + E.g. Setting `thresholdPriority` @@ -547,12 +552,12 @@ strategies: When the descheduler decides to evict pods from a node, it employs the following general mechanism: -* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted. +* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set). * Pods (static or mirrored pods or stand alone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are never evicted because these pods won't be recreated. * Pods associated with DaemonSets are never evicted. -* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set) -* Pods with PVCs are evicted unless `ignorePvcPods: true` is set. +* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set). +* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set). * In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority, best effort pods are evicted before burstable and guaranteed pods. * All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This diff --git a/pkg/api/types.go b/pkg/api/types.go index e545d8861..458f54f98 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -35,6 +35,9 @@ type DeschedulerPolicy struct { // EvictLocalStoragePods allows pods using local storage to be evicted. EvictLocalStoragePods *bool + // EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods) + EvictSystemCriticalPods *bool + // IgnorePVCPods prevents pods with PVCs from being evicted. IgnorePVCPods *bool diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index 3585addab..e67beff32 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -35,6 +35,9 @@ type DeschedulerPolicy struct { // EvictLocalStoragePods allows pods using local storage to be evicted. EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"` + // EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods) + EvictSystemCriticalPods *bool `json:"evictSystemCriticalPods,omitempty"` + // IgnorePVCPods prevents pods with PVCs from being evicted. IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"` diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 36a6482b5..8eae92b9f 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -93,6 +93,14 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods } + evictSystemCriticalPods := false + if deschedulerPolicy.EvictSystemCriticalPods != nil { + evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods + if evictSystemCriticalPods { + klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.") + } + } + ignorePvcPods := false if deschedulerPolicy.IgnorePVCPods != nil { ignorePvcPods = *deschedulerPolicy.IgnorePVCPods @@ -124,6 +132,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer maxNoOfPodsToEvictPerNode, nodes, evictLocalStoragePods, + evictSystemCriticalPods, ignorePvcPods, ) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index ae7c7a55a..4ec78bd08 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -46,13 +46,14 @@ const ( type nodePodEvictedCount map[*v1.Node]int type PodEvictor struct { - client clientset.Interface - policyGroupVersion string - dryRun bool - maxPodsToEvictPerNode int - nodepodCount nodePodEvictedCount - evictLocalStoragePods bool - ignorePvcPods bool + client clientset.Interface + policyGroupVersion string + dryRun bool + maxPodsToEvictPerNode int + nodepodCount nodePodEvictedCount + evictLocalStoragePods bool + evictSystemCriticalPods bool + ignorePvcPods bool } func NewPodEvictor( @@ -62,6 +63,7 @@ func NewPodEvictor( maxPodsToEvictPerNode int, nodes []*v1.Node, evictLocalStoragePods bool, + evictSystemCriticalPods bool, ignorePvcPods bool, ) *PodEvictor { var nodePodCount = make(nodePodEvictedCount) @@ -71,13 +73,14 @@ func NewPodEvictor( } return &PodEvictor{ - client: client, - policyGroupVersion: policyGroupVersion, - dryRun: dryRun, - maxPodsToEvictPerNode: maxPodsToEvictPerNode, - nodepodCount: nodePodCount, - evictLocalStoragePods: evictLocalStoragePods, - ignorePvcPods: ignorePvcPods, + client: client, + policyGroupVersion: policyGroupVersion, + dryRun: dryRun, + maxPodsToEvictPerNode: maxPodsToEvictPerNode, + nodepodCount: nodePodCount, + evictLocalStoragePods: evictLocalStoragePods, + evictSystemCriticalPods: evictSystemCriticalPods, + ignorePvcPods: ignorePvcPods, } } @@ -188,42 +191,50 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable { } ev := &evictable{} + if !pe.evictSystemCriticalPods { + ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { + // Moved from IsEvictable function to allow for disabling + if utils.IsCriticalPriorityPod(pod) { + return fmt.Errorf("pod has system critical priority") + } + return nil + }) + + if options.priority != nil { + ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { + if IsPodEvictableBasedOnPriority(pod, *options.priority) { + return nil + } + return fmt.Errorf("pod has higher priority than specified priority class threshold") + }) + } + } if !pe.evictLocalStoragePods { ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { - if IsPodWithLocalStorage(pod) { - return fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods") + if utils.IsPodWithLocalStorage(pod) { + return fmt.Errorf("pod has local storage and descheduler is not configured with evictLocalStoragePods") } return nil }) } if pe.ignorePvcPods { ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { - if IsPodWithPVC(pod) { + if utils.IsPodWithPVC(pod) { return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods") } return nil }) } - if options.priority != nil { - ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { - if IsPodEvictableBasedOnPriority(pod, *options.priority) { - return nil - } - return fmt.Errorf("pod has higher priority than specified priority class threshold") - }) - } + return ev } // IsEvictable decides when a pod is evictable func (ev *evictable) IsEvictable(pod *v1.Pod) bool { checkErrs := []error{} - if IsCriticalPod(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod is critical")) - } ownerRefList := podutil.OwnerRef(pod) - if IsDaemonsetPod(ownerRefList) { + if utils.IsDaemonsetPod(ownerRefList) { checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod")) } @@ -231,10 +242,14 @@ func (ev *evictable) IsEvictable(pod *v1.Pod) bool { checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs")) } - if IsMirrorPod(pod) { + if utils.IsMirrorPod(pod) { checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod")) } + if utils.IsStaticPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod")) + } + for _, c := range ev.constraints { if err := c(pod); err != nil { checkErrs = append(checkErrs, err) @@ -245,52 +260,16 @@ func (ev *evictable) IsEvictable(pod *v1.Pod) bool { klog.V(4).InfoS("Pod lacks an eviction annotation and fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error()) return false } + return true } -func IsCriticalPod(pod *v1.Pod) bool { - return utils.IsCriticalPod(pod) -} - -func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { - for _, ownerRef := range ownerRefList { - if ownerRef.Kind == "DaemonSet" { - return true - } - } - return false -} - -// IsMirrorPod checks whether the pod is a mirror pod. -func IsMirrorPod(pod *v1.Pod) bool { - return utils.IsMirrorPod(pod) -} - // HaveEvictAnnotation checks if the pod have evict annotation func HaveEvictAnnotation(pod *v1.Pod) bool { _, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey] return found } -func IsPodWithLocalStorage(pod *v1.Pod) bool { - for _, volume := range pod.Spec.Volumes { - if volume.HostPath != nil || volume.EmptyDir != nil { - return true - } - } - - return false -} - -func IsPodWithPVC(pod *v1.Pod) bool { - for _, volume := range pod.Spec.Volumes { - if volume.PersistentVolumeClaim != nil { - return true - } - } - return false -} - // IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec. func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool { return pod.Spec.Priority == nil || *pod.Spec.Priority < priority diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index 986a3247a..fb4bf4d74 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -74,45 +74,67 @@ func TestIsEvictable(t *testing.T) { lowPriority := int32(800) highPriority := int32(900) type testCase struct { - pod *v1.Pod - runBefore func(*v1.Pod) - evictLocalStoragePods bool - priorityThreshold *int32 - result bool + pod *v1.Pod + runBefore func(*v1.Pod) + evictLocalStoragePods bool + evictSystemCriticalPods bool + priorityThreshold *int32 + result bool } testCases := []testCase{ - { + { // Normal pod eviction with normal ownerRefs pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Normal pod eviction with replicaSet ownerRefs pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Normal pod eviction with statefulSet ownerRefs + pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation + pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() @@ -127,9 +149,10 @@ func TestIsEvictable(t *testing.T) { }, } }, - evictLocalStoragePods: false, - result: false, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: false, + }, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() @@ -144,9 +167,10 @@ func TestIsEvictable(t *testing.T) { }, } }, - evictLocalStoragePods: true, - result: true, - }, { + evictLocalStoragePods: true, + evictSystemCriticalPods: false, + result: true, + }, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} @@ -162,50 +186,56 @@ func TestIsEvictable(t *testing.T) { }, } }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Pod not evicted becasuse it is part of a daemonSet pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() }, - evictLocalStoragePods: false, - result: false, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: false, + }, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Pod not evicted becasuse it is a mirror pod pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.Annotations = test.GetMirrorPodAnnotation() }, - evictLocalStoragePods: false, - result: false, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: false, + }, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.Annotations = test.GetMirrorPodAnnotation() pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Pod not evicted becasuse it has system critical priority pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() priority := utils.SystemCriticalPriority pod.Spec.Priority = &priority }, - evictLocalStoragePods: false, - result: false, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: false, + }, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() @@ -215,42 +245,72 @@ func TestIsEvictable(t *testing.T) { "descheduler.alpha.kubernetes.io/evict": "true", } }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + result: true, + }, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.Spec.Priority = &highPriority }, - evictLocalStoragePods: false, - priorityThreshold: &lowPriority, - result: false, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + priorityThreshold: &lowPriority, + result: false, + }, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} pod.Spec.Priority = &highPriority }, - evictLocalStoragePods: false, - priorityThreshold: &lowPriority, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + priorityThreshold: &lowPriority, + result: true, + }, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { - pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority }, - evictLocalStoragePods: false, - result: true, - }, { + evictLocalStoragePods: false, + evictSystemCriticalPods: true, + result: true, + }, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation + pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: true, + result: true, + }, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil), runBefore: func(pod *v1.Pod) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Priority = &highPriority }, - evictLocalStoragePods: false, - result: true, + evictLocalStoragePods: false, + evictSystemCriticalPods: true, + priorityThreshold: &lowPriority, + result: true, + }, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation + pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.Spec.Priority = &highPriority + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: true, + priorityThreshold: &lowPriority, + result: true, }, } @@ -258,7 +318,8 @@ func TestIsEvictable(t *testing.T) { test.runBefore(test.pod) podEvictor := &PodEvictor{ - evictLocalStoragePods: test.evictLocalStoragePods, + evictLocalStoragePods: test.evictLocalStoragePods, + evictSystemCriticalPods: test.evictSystemCriticalPods, } evictable := podEvictor.Evictable() @@ -301,18 +362,18 @@ func TestPodTypes(t *testing.T) { } // A Mirror Pod. p4.Annotations = test.GetMirrorPodAnnotation() - if !IsMirrorPod(p4) { + if !utils.IsMirrorPod(p4) { t.Errorf("Expected p4 to be a mirror pod.") } - if !IsPodWithLocalStorage(p3) { + if !utils.IsPodWithLocalStorage(p3) { t.Errorf("Expected p3 to be a pod with local storage.") } ownerRefList := podutil.OwnerRef(p2) - if !IsDaemonsetPod(ownerRefList) { + if !utils.IsDaemonsetPod(ownerRefList) { t.Errorf("Expected p2 to be a daemonset pod.") } ownerRefList = podutil.OwnerRef(p1) - if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) { + if utils.IsDaemonsetPod(ownerRefList) || utils.IsPodWithLocalStorage(p1) || utils.IsCriticalPriorityPod(p1) || utils.IsMirrorPod(p1) || utils.IsStaticPod(p1) { t.Errorf("Expected p1 to be a normal pod.") } diff --git a/pkg/descheduler/strategies/duplicates_test.go b/pkg/descheduler/strategies/duplicates_test.go index 8954feefb..097018f25 100644 --- a/pkg/descheduler/strategies/duplicates_test.go +++ b/pkg/descheduler/strategies/duplicates_test.go @@ -211,6 +211,7 @@ func TestFindDuplicatePods(t *testing.T) { []*v1.Node{node1, node2}, false, false, + false, ) RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node1, node2}, podEvictor) @@ -407,6 +408,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { testCase.nodes, false, false, + false, ) RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/lownodeutilization_test.go b/pkg/descheduler/strategies/lownodeutilization_test.go index 01ea264a1..a7a30918f 100644 --- a/pkg/descheduler/strategies/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/lownodeutilization_test.go @@ -446,6 +446,7 @@ func TestLowNodeUtilization(t *testing.T) { nodes, false, false, + false, ) strategy := api.DeschedulerStrategy{ @@ -767,6 +768,7 @@ func TestWithTaints(t *testing.T) { item.nodes, false, false, + false, ) LowNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/descheduler/strategies/node_affinity_test.go index ceb4772ab..1addccbc0 100644 --- a/pkg/descheduler/strategies/node_affinity_test.go +++ b/pkg/descheduler/strategies/node_affinity_test.go @@ -20,7 +20,7 @@ import ( "context" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -159,6 +159,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { tc.nodes, false, false, + false, ) RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/node_taint_test.go b/pkg/descheduler/strategies/node_taint_test.go index c9515701e..c3c5ae681 100644 --- a/pkg/descheduler/strategies/node_taint_test.go +++ b/pkg/descheduler/strategies/node_taint_test.go @@ -5,7 +5,7 @@ import ( "fmt" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" @@ -75,6 +75,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { p7.Namespace = "kube-system" priority := utils.SystemCriticalPriority p7.Spec.Priority = &priority + p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() // A daemonset. p8.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() @@ -102,6 +103,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { nodes []*v1.Node pods []v1.Pod evictLocalStoragePods bool + evictSystemCriticalPods bool maxPodsToEvictPerNode int expectedEvictedPodCount int }{ @@ -111,6 +113,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { pods: []v1.Pod{*p1, *p2, *p3}, nodes: []*v1.Node{node1}, evictLocalStoragePods: false, + evictSystemCriticalPods: false, maxPodsToEvictPerNode: 0, expectedEvictedPodCount: 1, //p2 gets evicted }, @@ -119,6 +122,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { pods: []v1.Pod{*p1, *p3, *p4}, nodes: []*v1.Node{node1}, evictLocalStoragePods: false, + evictSystemCriticalPods: false, maxPodsToEvictPerNode: 0, expectedEvictedPodCount: 1, //p4 gets evicted }, @@ -127,6 +131,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { pods: []v1.Pod{*p1, *p5, *p6}, nodes: []*v1.Node{node1}, evictLocalStoragePods: false, + evictSystemCriticalPods: false, maxPodsToEvictPerNode: 1, expectedEvictedPodCount: 1, //p5 or p6 gets evicted }, @@ -135,24 +140,36 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { pods: []v1.Pod{*p7, *p8, *p9, *p10}, nodes: []*v1.Node{node2}, evictLocalStoragePods: false, + evictSystemCriticalPods: false, maxPodsToEvictPerNode: 0, - expectedEvictedPodCount: 0, + expectedEvictedPodCount: 0, //nothing is evicted }, { description: "Critical pods except storage pods not tolerating node taint should not be evicted", pods: []v1.Pod{*p7, *p8, *p9, *p10}, nodes: []*v1.Node{node2}, evictLocalStoragePods: true, + evictSystemCriticalPods: false, maxPodsToEvictPerNode: 0, - expectedEvictedPodCount: 1, + expectedEvictedPodCount: 1, //p9 gets evicted }, { description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted", pods: []v1.Pod{*p7, *p8, *p10, *p11}, nodes: []*v1.Node{node2}, evictLocalStoragePods: false, + evictSystemCriticalPods: false, maxPodsToEvictPerNode: 0, - expectedEvictedPodCount: 1, + expectedEvictedPodCount: 1, //p11 gets evicted + }, + { + description: "Critical and non critical pods, pods not tolerating node taint should be evicted even if they are critical", + pods: []v1.Pod{*p2, *p7, *p9, *p10}, + nodes: []*v1.Node{node2}, + evictLocalStoragePods: false, + evictSystemCriticalPods: true, + maxPodsToEvictPerNode: 0, + expectedEvictedPodCount: 2, //p2 and p7 are evicted }, } @@ -171,6 +188,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { tc.maxPodsToEvictPerNode, tc.nodes, tc.evictLocalStoragePods, + tc.evictSystemCriticalPods, false, ) diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/descheduler/strategies/pod_antiaffinity_test.go index 5c9970a5e..abae525b6 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity_test.go +++ b/pkg/descheduler/strategies/pod_antiaffinity_test.go @@ -20,7 +20,7 @@ import ( "context" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" @@ -121,6 +121,7 @@ func TestPodAntiAffinity(t *testing.T) { []*v1.Node{node}, false, false, + false, ) RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, api.DeschedulerStrategy{}, []*v1.Node{node}, podEvictor) diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go index 8f3baf51e..69e91ad85 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -253,6 +253,7 @@ func TestPodLifeTime(t *testing.T) { tc.maxPodsToEvictPerNode, []*v1.Node{node}, false, + false, tc.ignorePvcPods, ) diff --git a/pkg/descheduler/strategies/toomanyrestarts_test.go b/pkg/descheduler/strategies/toomanyrestarts_test.go index 4f038ae9e..6e0eda339 100644 --- a/pkg/descheduler/strategies/toomanyrestarts_test.go +++ b/pkg/descheduler/strategies/toomanyrestarts_test.go @@ -22,7 +22,7 @@ import ( "fmt" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" @@ -173,6 +173,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { []*v1.Node{node}, false, false, + false, ) RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, []*v1.Node{node}, podEvictor) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index f8c337033..c9c4354c1 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -3,9 +3,10 @@ package strategies import ( "context" "fmt" - "sigs.k8s.io/descheduler/pkg/api" "testing" + "sigs.k8s.io/descheduler/pkg/api" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -458,6 +459,7 @@ func TestTopologySpreadConstraint(t *testing.T) { tc.nodes, false, false, + false, ) RemovePodsViolatingTopologySpreadConstraint(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor) podsEvicted := podEvictor.TotalEvicted() diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go index 4ea72b0ca..288607d07 100644 --- a/pkg/utils/pod.go +++ b/pkg/utils/pod.go @@ -5,6 +5,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/component-base/featuregate" "k8s.io/klog/v2" @@ -82,7 +83,7 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou return requestQuantity } -// IsMirrorPod returns true if the passed Pod is a Mirror Pod. +// IsMirrorPod returns true if the pod is a Mirror Pod. func IsMirrorPod(pod *v1.Pod) bool { _, ok := pod.Annotations[v1.MirrorPodAnnotationKey] return ok @@ -94,6 +95,42 @@ func IsStaticPod(pod *v1.Pod) bool { return err == nil && source != "api" } +// IsCriticalPriorityPod returns true if the pod has critical priority. +func IsCriticalPriorityPod(pod *v1.Pod) bool { + return pod.Spec.Priority != nil && *pod.Spec.Priority >= SystemCriticalPriority +} + +// IsDaemonsetPod returns true if the pod is a IsDaemonsetPod. +func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { + for _, ownerRef := range ownerRefList { + if ownerRef.Kind == "DaemonSet" { + return true + } + } + return false +} + +// IsPodWithLocalStorage returns true if the pod has local storage. +func IsPodWithLocalStorage(pod *v1.Pod) bool { + for _, volume := range pod.Spec.Volumes { + if volume.HostPath != nil || volume.EmptyDir != nil { + return true + } + } + + return false +} + +// IsPodWithLocalStorage returns true if the pod has claimed a persistent volume. +func IsPodWithPVC(pod *v1.Pod) bool { + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + return true + } + } + return false +} + // GetPodSource returns the source of the pod based on the annotation. func GetPodSource(pod *v1.Pod) (string, error) { if pod.Annotations != nil { @@ -104,23 +141,6 @@ func GetPodSource(pod *v1.Pod) (string, error) { return "", fmt.Errorf("cannot get source of pod %q", pod.UID) } -// IsCriticalPod returns true if the pod is a static or mirror pod. -func IsCriticalPod(pod *v1.Pod) bool { - if IsStaticPod(pod) { - return true - } - - if IsMirrorPod(pod) { - return true - } - - if pod.Spec.Priority != nil && *pod.Spec.Priority >= SystemCriticalPriority { - return true - } - - return false -} - // PodRequestsAndLimits returns a dictionary of all defined resources summed up for all // containers of the pod. If PodOverhead feature is enabled, pod overhead is added to the // total container resource requests and to the total container limits which have a diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index b0f070b69..89bd0adee 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -48,7 +48,7 @@ import ( "sigs.k8s.io/descheduler/pkg/descheduler/strategies" ) -func MakePodSpec(priorityClassName string) v1.PodSpec { +func MakePodSpec(priorityClassName string, gracePeriod *int64) v1.PodSpec { return v1.PodSpec{ Containers: []v1.Container{{ Name: "pause", @@ -66,7 +66,8 @@ func MakePodSpec(priorityClassName string) v1.PodSpec { }, }, }}, - PriorityClassName: priorityClassName, + PriorityClassName: priorityClassName, + TerminationGracePeriodSeconds: gracePeriod, } } @@ -97,7 +98,7 @@ func RcByNameContainer(name, namespace string, replicas int32, labels map[string ObjectMeta: metav1.ObjectMeta{ Labels: labels, }, - Spec: MakePodSpec(priorityClassName), + Spec: MakePodSpec(priorityClassName, gracePeriod), }, }, } @@ -155,38 +156,6 @@ func initializeClient(t *testing.T) (clientset.Interface, coreinformers.NodeInfo return clientSet, nodeInformer, stopChannel } -func TestLowNodeUtilization(t *testing.T) { - ctx := context.Background() - - clientSet, nodeInformer, stopCh := initializeClient(t) - defer close(stopCh) - - nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) - if err != nil { - t.Errorf("Error listing node with %v", err) - } - - var nodes []*v1.Node - for i := range nodeList.Items { - node := nodeList.Items[i] - nodes = append(nodes, &node) - } - - testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} - if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { - t.Fatalf("Unable to create ns %v", testNamespace.Name) - } - defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) - - rc := RcByNameContainer("test-rc-node-utilization", testNamespace.Name, int32(15), map[string]string{"test": "node-utilization"}, nil, "") - if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil { - t.Errorf("Error creating deployment %v", err) - } - - evictPods(ctx, t, clientSet, nodeInformer, nodes, rc) - deleteRC(ctx, t, clientSet, rc) -} - func runPodLifetimeStrategy( ctx context.Context, clientset clientset.Interface, @@ -194,6 +163,7 @@ func runPodLifetimeStrategy( namespaces *deschedulerapi.Namespaces, priorityClass string, priority *int32, + evictCritical bool, labelSelector *metav1.LabelSelector, ) { // Run descheduler. @@ -229,6 +199,7 @@ func runPodLifetimeStrategy( 0, nodes, false, + evictCritical, false, ), ) @@ -257,6 +228,38 @@ func intersectStrings(lista, listb []string) []string { return commonNames } +func TestLowNodeUtilization(t *testing.T) { + ctx := context.Background() + + clientSet, nodeInformer, stopCh := initializeClient(t) + defer close(stopCh) + + nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + t.Errorf("Error listing node with %v", err) + } + + var nodes []*v1.Node + for i := range nodeList.Items { + node := nodeList.Items[i] + nodes = append(nodes, &node) + } + + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + + rc := RcByNameContainer("test-rc-node-utilization", testNamespace.Name, int32(15), map[string]string{"test": "node-utilization"}, nil, "") + if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil { + t.Errorf("Error creating deployment %v", err) + } + + evictPods(ctx, t, clientSet, nodeInformer, nodes, rc) + deleteRC(ctx, t, clientSet, rc) +} + // TODO(jchaloup): add testcases for two included/excluded namespaces func TestNamespaceConstraintsInclude(t *testing.T) { @@ -297,7 +300,7 @@ func TestNamespaceConstraintsInclude(t *testing.T) { t.Logf("set the strategy to delete pods from %v namespace", rc.Namespace) runPodLifetimeStrategy(ctx, clientSet, nodeInformer, &deschedulerapi.Namespaces{ Include: []string{rc.Namespace}, - }, "", nil, nil) + }, "", nil, false, nil) // All pods are supposed to be deleted, wait until all the old pods are deleted if err := wait.PollImmediate(time.Second, 20*time.Second, func() (bool, error) { @@ -368,7 +371,7 @@ func TestNamespaceConstraintsExclude(t *testing.T) { t.Logf("set the strategy to delete pods from namespaces except the %v namespace", rc.Namespace) runPodLifetimeStrategy(ctx, clientSet, nodeInformer, &deschedulerapi.Namespaces{ Exclude: []string{rc.Namespace}, - }, "", nil, nil) + }, "", nil, false, nil) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -387,6 +390,136 @@ func TestNamespaceConstraintsExclude(t *testing.T) { } } +func TestEvictSystemCriticalPriority(t *testing.T) { + testEvictSystemCritical(t, false) +} + +func TestEvictSystemCriticalPriorityClass(t *testing.T) { + testEvictSystemCritical(t, true) +} + +func testEvictSystemCritical(t *testing.T, isPriorityClass bool) { + var highPriority = int32(1000) + var lowPriority = int32(500) + ctx := context.Background() + + clientSet, nodeInformer, stopCh := initializeClient(t) + defer close(stopCh) + + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + + // create two priority classes + highPriorityClass := &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name()) + "-highpriority"}, + Value: highPriority, + } + if _, err := clientSet.SchedulingV1().PriorityClasses().Create(ctx, highPriorityClass, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating priorityclass %s: %v", highPriorityClass.Name, err) + } + defer clientSet.SchedulingV1().PriorityClasses().Delete(ctx, highPriorityClass.Name, metav1.DeleteOptions{}) + + lowPriorityClass := &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name()) + "-lowpriority"}, + Value: lowPriority, + } + if _, err := clientSet.SchedulingV1().PriorityClasses().Create(ctx, lowPriorityClass, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating priorityclass %s: %v", lowPriorityClass.Name, err) + } + defer clientSet.SchedulingV1().PriorityClasses().Delete(ctx, lowPriorityClass.Name, metav1.DeleteOptions{}) + + // Create a replication controller with the "system-node-critical" priority class (this gives the pods a priority of 2000001000) + rcCriticalPriority := RcByNameContainer("test-rc-podlifetime-criticalpriority", testNamespace.Name, 3, + map[string]string{"test": "podlifetime-criticalpriority"}, nil, "system-node-critical") + if _, err := clientSet.CoreV1().ReplicationControllers(rcCriticalPriority.Namespace).Create(ctx, rcCriticalPriority, metav1.CreateOptions{}); err != nil { + t.Errorf("Error creating rc %s: %v", rcCriticalPriority.Name, err) + } + defer deleteRC(ctx, t, clientSet, rcCriticalPriority) + + // create two RCs with different priority classes in the same namespace + rcHighPriority := RcByNameContainer("test-rc-podlifetime-highpriority", testNamespace.Name, 3, + map[string]string{"test": "podlifetime-highpriority"}, nil, highPriorityClass.Name) + if _, err := clientSet.CoreV1().ReplicationControllers(rcHighPriority.Namespace).Create(ctx, rcHighPriority, metav1.CreateOptions{}); err != nil { + t.Errorf("Error creating rc %s: %v", rcHighPriority.Name, err) + } + defer deleteRC(ctx, t, clientSet, rcHighPriority) + + rcLowPriority := RcByNameContainer("test-rc-podlifetime-lowpriority", testNamespace.Name, 3, + map[string]string{"test": "podlifetime-lowpriority"}, nil, lowPriorityClass.Name) + if _, err := clientSet.CoreV1().ReplicationControllers(rcLowPriority.Namespace).Create(ctx, rcLowPriority, metav1.CreateOptions{}); err != nil { + t.Errorf("Error creating rc %s: %v", rcLowPriority.Name, err) + } + defer deleteRC(ctx, t, clientSet, rcLowPriority) + + // wait for a while so all the pods are at least few seconds older + time.Sleep(5 * time.Second) + + // it's assumed all new pods are named differently from currently running -> no name collision + podListCriticalPriority, err := clientSet.CoreV1().Pods(rcCriticalPriority.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcCriticalPriority.Spec.Template.Labels).String()}) + if err != nil { + t.Fatalf("Unable to list pods: %v", err) + } + + podListHighPriority, err := clientSet.CoreV1().Pods(rcHighPriority.Namespace).List( + ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcHighPriority.Spec.Template.Labels).String()}) + if err != nil { + t.Fatalf("Unable to list pods: %v", err) + } + podListLowPriority, err := clientSet.CoreV1().Pods(rcLowPriority.Namespace).List( + ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcLowPriority.Spec.Template.Labels).String()}) + if err != nil { + t.Fatalf("Unable to list pods: %v", err) + } + + if (len(podListHighPriority.Items) + len(podListLowPriority.Items) + len(podListCriticalPriority.Items)) != 9 { + t.Fatalf("Expected 9 replicas, got %v instead", len(podListHighPriority.Items)+len(podListLowPriority.Items)+len(podListCriticalPriority.Items)) + } + + initialPodNames := append(getPodNames(podListHighPriority.Items), getPodNames(podListLowPriority.Items)...) + initialPodNames = append(initialPodNames, getPodNames(podListCriticalPriority.Items)...) + sort.Strings(initialPodNames) + t.Logf("Existing pods: %v", initialPodNames) + + if isPriorityClass { + runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, true, nil) + } else { + runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, "", &highPriority, true, nil) + } + + // All pods are supposed to be deleted, wait until all pods in the test namespace are terminating + t.Logf("All pods in the test namespace, no matter their priority (including system-node-critical and system-cluster-critical), will be deleted") + if err := wait.PollImmediate(time.Second, 30*time.Second, func() (bool, error) { + podList, err := clientSet.CoreV1().Pods(testNamespace.Name).List(ctx, metav1.ListOptions{}) + if err != nil { + return false, nil + } + currentPodNames := getPodNames(podList.Items) + // validate all pod were deleted + if len(intersectStrings(initialPodNames, currentPodNames)) > 0 { + t.Logf("Waiting until %v pods get deleted", intersectStrings(initialPodNames, currentPodNames)) + // check if there's at least one pod not in Terminating state + for _, pod := range podList.Items { + // In case podList contains newly created pods + if len(intersectStrings(initialPodNames, []string{pod.Name})) == 0 { + continue + } + if pod.DeletionTimestamp == nil { + t.Logf("Pod %v not in terminating state", pod.Name) + return false, nil + } + } + t.Logf("All %v pods are terminating", intersectStrings(initialPodNames, currentPodNames)) + } + + return true, nil + }); err != nil { + t.Fatalf("Error waiting for pods to be deleted: %v", err) + } +} + func TestThresholdPriority(t *testing.T) { testPriority(t, false) } @@ -466,14 +599,14 @@ func testPriority(t *testing.T, isPriorityClass bool) { expectEvictPodNames := getPodNames(podListLowPriority.Items) sort.Strings(expectReservePodNames) sort.Strings(expectEvictPodNames) - t.Logf("Pods not expect to be evicted: %v, pods expect to be evicted: %v", expectReservePodNames, expectEvictPodNames) + t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames) if isPriorityClass { t.Logf("set the strategy to delete pods with priority lower than priority class %s", highPriorityClass.Name) - runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, nil) + runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, false, nil) } else { t.Logf("set the strategy to delete pods with priority lower than %d", highPriority) - runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, "", &highPriority, nil) + runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, "", &highPriority, false, nil) } t.Logf("Waiting 10s") @@ -495,7 +628,7 @@ func testPriority(t *testing.T, isPriorityClass bool) { } //check if all pods with low priority class are evicted - if err := wait.PollImmediate(time.Second, 20*time.Second, func() (bool, error) { + if err := wait.PollImmediate(time.Second, 30*time.Second, func() (bool, error) { podListLowPriority, err := clientSet.CoreV1().Pods(rcLowPriority.Namespace).List( ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcLowPriority.Spec.Template.Labels).String()}) if err != nil { @@ -574,10 +707,10 @@ func TestPodLabelSelector(t *testing.T) { expectEvictPodNames := getPodNames(podListEvict.Items) sort.Strings(expectReservePodNames) sort.Strings(expectEvictPodNames) - t.Logf("Pods not expect to be evicted: %v, pods expect to be evicted: %v", expectReservePodNames, expectEvictPodNames) + t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames) t.Logf("set the strategy to delete pods with label test:podlifetime-evict") - runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, "", nil, &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}}) + runPodLifetimeStrategy(ctx, clientSet, nodeInformer, nil, "", nil, false, &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}}) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -598,7 +731,7 @@ func TestPodLabelSelector(t *testing.T) { } //check if all selected pods are evicted - if err := wait.PollImmediate(time.Second, 20*time.Second, func() (bool, error) { + if err := wait.PollImmediate(time.Second, 30*time.Second, func() (bool, error) { podListEvict, err := clientSet.CoreV1().Pods(rcEvict.Namespace).List( ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcEvict.Spec.Template.Labels).String()}) if err != nil { @@ -732,6 +865,17 @@ func deleteRC(ctx context.Context, t *testing.T, clientSet clientset.Interface, t.Errorf("Deleting of rc pods took too long") } + if err := wait.PollImmediate(5*time.Second, time.Minute, func() (bool, error) { + podList, _ := clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()}) + t.Logf("Waiting for %v RC pods to disappear, still %v remaining", rc.Name, len(podList.Items)) + if len(podList.Items) > 0 { + return false, nil + } + return true, nil + }); err != nil { + t.Fatalf("Error waiting for rc pods to disappear: %v", err) + } + if err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Delete(ctx, rc.Name, metav1.DeleteOptions{}); err != nil { t.Fatalf("Error deleting rc %v", err) } @@ -762,6 +906,7 @@ func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface, nodeList, true, false, + false, ) for _, node := range nodeList { // Skip the Master Node diff --git a/test/run-e2e-tests.sh b/test/run-e2e-tests.sh index 09c6cb8cf..441fedc55 100755 --- a/test/run-e2e-tests.sh +++ b/test/run-e2e-tests.sh @@ -14,7 +14,7 @@ #!/bin/bash -# This just run e2e tests. +# This just runs e2e tests. if [ -n "$KIND_E2E" ]; then K8S_VERSION=${KUBERNETES_VERSION:-v1.18.2} curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl && chmod +x kubectl && sudo mv kubectl /usr/local/bin/