diff --git a/README.md b/README.md index 2479a8310..6af68990b 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,9 @@ never evicted because these pods won't be recreated. * Pods associated with DaemonSets are never evicted. * Pods with local storage are never evicted. * Best efforts pods are evicted before Burstable and Guaranteed pods. +* All types of pods with annotation descheduler.alpha.kubernetes.io/evict are evicted. This +annotation is used to override checks which prevent eviction and user can select which pod is evicted. +User should know how and if the pod will be recreated. ### Pod disruption Budget (PDB) Pods subject to Pod Disruption Budget (PDB) are not evicted if descheduling violates its pod diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index 26b85f400..cf653d460 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -17,7 +17,7 @@ limitations under the License. package pod import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" clientset "k8s.io/client-go/kubernetes" @@ -26,6 +26,10 @@ import ( "k8s.io/kubernetes/pkg/kubelet/types" ) +const ( + evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict" +) + // checkLatencySensitiveResourcesForAContainer checks if there are any latency sensitive resources like GPUs. func checkLatencySensitiveResourcesForAContainer(rl v1.ResourceList) bool { if rl == nil { @@ -54,7 +58,7 @@ func IsLatencySensitivePod(pod *v1.Pod) bool { // IsEvictable checks if a pod is evictable or not. func IsEvictable(pod *v1.Pod, evictLocalStoragePods bool) bool { ownerRefList := OwnerRef(pod) - if IsMirrorPod(pod) || (!evictLocalStoragePods && IsPodWithLocalStorage(pod)) || len(ownerRefList) == 0 || IsDaemonsetPod(ownerRefList) || IsCriticalPod(pod) { + if !HaveEvictAnnotation(pod) && (IsMirrorPod(pod) || (!evictLocalStoragePods && IsPodWithLocalStorage(pod)) || len(ownerRefList) == 0 || IsDaemonsetPod(ownerRefList) || IsCriticalPod(pod)) { return false } return true @@ -127,6 +131,12 @@ func IsMirrorPod(pod *v1.Pod) bool { return found } +// HaveEvictAnnotation checks if the pod have evict annotation +func HaveEvictAnnotation(pod *v1.Pod) bool { + _, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey] + return found +} + func IsPodWithLocalStorage(pod *v1.Pod) bool { for _, volume := range pod.Spec.Volumes { if volume.HostPath != nil || volume.EmptyDir != nil { diff --git a/pkg/descheduler/pod/pods_test.go b/pkg/descheduler/pod/pods_test.go index 2c87e3874..2800d6c2b 100644 --- a/pkg/descheduler/pod/pods_test.go +++ b/pkg/descheduler/pod/pods_test.go @@ -19,11 +19,160 @@ package pod import ( "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/descheduler/test" ) +func TestIsEvictable(t *testing.T) { + n1 := test.BuildTestNode("node1", 1000, 2000, 13) + type testCase struct { + pod *v1.Pod + runBefore func(*v1.Pod) + evictLocalStoragePods bool + result bool + } + + testCases := []testCase{ + { + pod: test.BuildTestPod("p1", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p2", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p3", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p4", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p5", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p6", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: true, + result: true, + }, { + pod: test.BuildTestPod("p7", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p8", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p9", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p10", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetMirrorPodAnnotation() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p11", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetMirrorPodAnnotation() + pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p12", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetCriticalPodAnnotation() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p13", 400, 0, n1.Name), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetCriticalPodAnnotation() + pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }, + evictLocalStoragePods: false, + result: true, + }, + } + + for _, test := range testCases { + test.runBefore(test.pod) + result := IsEvictable(test.pod, test.evictLocalStoragePods) + if result != test.result { + t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) + } + + } +} func TestPodTypes(t *testing.T) { n1 := test.BuildTestNode("node1", 1000, 2000, 9) p1 := test.BuildTestPod("p1", 400, 0, n1.Name) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index e190affcd..6f899a3cc 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -17,12 +17,13 @@ limitations under the License. package e2e import ( - "github.com/golang/glog" "math" "testing" "time" - "k8s.io/api/core/v1" + "github.com/golang/glog" + + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" @@ -122,7 +123,6 @@ func startEndToEndForLowNodeUtilization(clientset clientset.Interface) { func TestE2E(t *testing.T) { // If we have reached here, it means cluster would have been already setup and the kubeconfig file should // be in /tmp directory as admin.conf. - var leastLoadedNode v1.Node clientSet, err := client.CreateClient("/tmp/admin.conf") if err != nil { t.Errorf("Error during client creation with %v", err) @@ -138,6 +138,28 @@ func TestE2E(t *testing.T) { if err != nil { t.Errorf("Error creating deployment %v", err) } + evictPods(t, clientSet, nodeList, rc) + + rc.Spec.Template.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + rc.Spec.Replicas = func(i int32) *int32 { return &i }(15) + rc.Spec.Template.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + _, err = clientSet.CoreV1().ReplicationControllers("default").Create(rc) + if err != nil { + t.Errorf("Error creating deployment %v", err) + } + evictPods(t, clientSet, nodeList, rc) +} + +func evictPods(t *testing.T, clientSet clientset.Interface, nodeList *v1.NodeList, rc *v1.ReplicationController) { + var leastLoadedNode v1.Node podsBefore := math.MaxInt16 for i := range nodeList.Items { // Skip the Master Node @@ -165,4 +187,33 @@ func TestE2E(t *testing.T) { if podsBefore > podsAfter { t.Fatalf("We should have see more pods on this node as per kubeadm's way of installing %v, %v", podsBefore, podsAfter) } + + //set number of replicas to 0 + rc.Spec.Replicas = func(i int32) *int32 { return &i }(0) + _, err = clientSet.CoreV1().ReplicationControllers("default").Update(rc) + if err != nil { + t.Errorf("Error updating replica controller %v", err) + } + allPodsDeleted := false + //wait 30 seconds until all pods are deleted + for i := 0; i < 6; i++ { + scale, _ := clientSet.CoreV1().ReplicationControllers("default").GetScale(rc.Name, metav1.GetOptions{}) + if scale.Spec.Replicas == 0 { + allPodsDeleted = true + break + } + time.Sleep(5 * time.Second) + } + + if !allPodsDeleted { + t.Errorf("Deleting of rc pods took too long") + } + + err = clientSet.CoreV1().ReplicationControllers("default").Delete(rc.Name, &metav1.DeleteOptions{}) + if err != nil { + t.Errorf("Error deleting rc %v", err) + } + + //wait until rc is deleted + time.Sleep(5 * time.Second) }