From dc7f9efc19fd1acb67e14464c2f1d0830dfc3bcb Mon Sep 17 00:00:00 2001 From: ksimon1 Date: Thu, 3 Oct 2019 16:41:07 +0200 Subject: [PATCH 1/2] Descheduler can evict all types of pods with special annotation When pod has this annotation, descheduler will always try to evict this pod. User can control which pods (only pods with this annotation) will be evicted. Signed-off-by: ksimon1 --- README.md | 3 +++ pkg/descheduler/pod/pods.go | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2479a8310..6af68990b 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,9 @@ never evicted because these pods won't be recreated. * Pods associated with DaemonSets are never evicted. * Pods with local storage are never evicted. * Best efforts pods are evicted before Burstable and Guaranteed pods. +* All types of pods with annotation descheduler.alpha.kubernetes.io/evict are evicted. This +annotation is used to override checks which prevent eviction and user can select which pod is evicted. +User should know how and if the pod will be recreated. ### Pod disruption Budget (PDB) Pods subject to Pod Disruption Budget (PDB) are not evicted if descheduling violates its pod diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index 26b85f400..cf653d460 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -17,7 +17,7 @@ limitations under the License. package pod import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" clientset "k8s.io/client-go/kubernetes" @@ -26,6 +26,10 @@ import ( "k8s.io/kubernetes/pkg/kubelet/types" ) +const ( + evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict" +) + // checkLatencySensitiveResourcesForAContainer checks if there are any latency sensitive resources like GPUs. func checkLatencySensitiveResourcesForAContainer(rl v1.ResourceList) bool { if rl == nil { @@ -54,7 +58,7 @@ func IsLatencySensitivePod(pod *v1.Pod) bool { // IsEvictable checks if a pod is evictable or not. func IsEvictable(pod *v1.Pod, evictLocalStoragePods bool) bool { ownerRefList := OwnerRef(pod) - if IsMirrorPod(pod) || (!evictLocalStoragePods && IsPodWithLocalStorage(pod)) || len(ownerRefList) == 0 || IsDaemonsetPod(ownerRefList) || IsCriticalPod(pod) { + if !HaveEvictAnnotation(pod) && (IsMirrorPod(pod) || (!evictLocalStoragePods && IsPodWithLocalStorage(pod)) || len(ownerRefList) == 0 || IsDaemonsetPod(ownerRefList) || IsCriticalPod(pod)) { return false } return true @@ -127,6 +131,12 @@ func IsMirrorPod(pod *v1.Pod) bool { return found } +// HaveEvictAnnotation checks if the pod have evict annotation +func HaveEvictAnnotation(pod *v1.Pod) bool { + _, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey] + return found +} + func IsPodWithLocalStorage(pod *v1.Pod) bool { for _, volume := range pod.Spec.Volumes { if volume.HostPath != nil || volume.EmptyDir != nil { From fb1b5fc690aacd4254a08c75099cf21bc5271ac2 Mon Sep 17 00:00:00 2001 From: ksimon1 Date: Thu, 3 Oct 2019 16:51:03 +0200 Subject: [PATCH 2/2] added unit test updated e2e test Signed-off-by: ksimon1 --- pkg/descheduler/pod/pods_test.go | 151 ++++++++++++++++++++++++++++++- test/e2e/e2e_test.go | 57 +++++++++++- 2 files changed, 204 insertions(+), 4 deletions(-) diff --git a/pkg/descheduler/pod/pods_test.go b/pkg/descheduler/pod/pods_test.go index 2c87e3874..2800d6c2b 100644 --- a/pkg/descheduler/pod/pods_test.go +++ b/pkg/descheduler/pod/pods_test.go @@ -19,11 +19,160 @@ package pod import ( "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/descheduler/test" ) +func TestIsEvictable(t *testing.T) { + n1 := test.BuildTestNode("node1", 1000, 2000, 13) + type testCase struct { + pod *v1.Pod + runBefore func(*v1.Pod) + evictLocalStoragePods bool + result bool + } + + testCases := []testCase{ + { + pod: test.BuildTestPod("p1", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p2", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p3", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p4", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p5", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p6", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: true, + result: true, + }, { + pod: test.BuildTestPod("p7", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p8", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p9", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p10", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetMirrorPodAnnotation() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p11", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetMirrorPodAnnotation() + pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p12", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetCriticalPodAnnotation() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p13", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetCriticalPodAnnotation() + pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }, + evictLocalStoragePods: false, + result: true, + }, + } + + for _, test := range testCases { + test.runBefore(test.pod) + result := IsEvictable(test.pod, test.evictLocalStoragePods) + if result != test.result { + t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) + } + + } +} func TestPodTypes(t *testing.T) { n1 := test.BuildTestNode("node1", 1000, 2000, 9) p1 := test.BuildTestPod("p1", 400, 0, n1.Name) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index e190affcd..6f899a3cc 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -17,12 +17,13 @@ limitations under the License. package e2e import ( - "github.com/golang/glog" "math" "testing" "time" - "k8s.io/api/core/v1" + "github.com/golang/glog" + + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" @@ -122,7 +123,6 @@ func startEndToEndForLowNodeUtilization(clientset clientset.Interface) { func TestE2E(t *testing.T) { // If we have reached here, it means cluster would have been already setup and the kubeconfig file should // be in /tmp directory as admin.conf. - var leastLoadedNode v1.Node clientSet, err := client.CreateClient("/tmp/admin.conf") if err != nil { t.Errorf("Error during client creation with %v", err) @@ -138,6 +138,28 @@ func TestE2E(t *testing.T) { if err != nil { t.Errorf("Error creating deployment %v", err) } + evictPods(t, clientSet, nodeList, rc) + + rc.Spec.Template.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + rc.Spec.Replicas = func(i int32) *int32 { return &i }(15) + rc.Spec.Template.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + _, err = clientSet.CoreV1().ReplicationControllers("default").Create(rc) + if err != nil { + t.Errorf("Error creating deployment %v", err) + } + evictPods(t, clientSet, nodeList, rc) +} + +func evictPods(t *testing.T, clientSet clientset.Interface, nodeList *v1.NodeList, rc *v1.ReplicationController) { + var leastLoadedNode v1.Node podsBefore := math.MaxInt16 for i := range nodeList.Items { // Skip the Master Node @@ -165,4 +187,33 @@ func TestE2E(t *testing.T) { if podsBefore > podsAfter { t.Fatalf("We should have see more pods on this node as per kubeadm's way of installing %v, %v", podsBefore, podsAfter) } + + //set number of replicas to 0 + rc.Spec.Replicas = func(i int32) *int32 { return &i }(0) + _, err = clientSet.CoreV1().ReplicationControllers("default").Update(rc) + if err != nil { + t.Errorf("Error updating replica controller %v", err) + } + allPodsDeleted := false + //wait 30 seconds until all pods are deleted + for i := 0; i < 6; i++ { + scale, _ := clientSet.CoreV1().ReplicationControllers("default").GetScale(rc.Name, metav1.GetOptions{}) + if scale.Spec.Replicas == 0 { + allPodsDeleted = true + break + } + time.Sleep(5 * time.Second) + } + + if !allPodsDeleted { + t.Errorf("Deleting of rc pods took too long") + } + + err = clientSet.CoreV1().ReplicationControllers("default").Delete(rc.Name, &metav1.DeleteOptions{}) + if err != nil { + t.Errorf("Error deleting rc %v", err) + } + + //wait until rc is deleted + time.Sleep(5 * time.Second) }