diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index e6c83f862..66059c547 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -60,7 +60,7 @@ func Run(rs *options.DeschedulerServer) error { return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion, stopChannel) } -type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) +type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, stopChannel chan struct{}) error { sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0) @@ -99,11 +99,12 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer rs.DryRun, rs.MaxNoOfPodsToEvictPerNode, nodes, + rs.EvictLocalStoragePods, ) for name, f := range strategyFuncs { if strategy := deschedulerPolicy.Strategies[api.StrategyName(name)]; strategy.Enabled { - f(ctx, rs.Client, strategy, nodes, rs.EvictLocalStoragePods, podEvictor) + f(ctx, rs.Client, strategy, nodes, podEvictor) } } diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index e387590db..1eaff53af 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -24,24 +24,32 @@ import ( policy "k8s.io/api/policy/v1beta1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/errors" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" clientcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/tools/record" "k8s.io/klog" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/utils" eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" ) +const ( + evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict" +) + // nodePodEvictedCount keeps count of pods evicted on node type nodePodEvictedCount map[*v1.Node]int type PodEvictor struct { - client clientset.Interface - policyGroupVersion string - dryRun bool - maxPodsToEvict int - nodepodCount nodePodEvictedCount + client clientset.Interface + policyGroupVersion string + dryRun bool + maxPodsToEvict int + nodepodCount nodePodEvictedCount + evictLocalStoragePods bool } func NewPodEvictor( @@ -50,6 +58,7 @@ func NewPodEvictor( dryRun bool, maxPodsToEvict int, nodes []*v1.Node, + evictLocalStoragePods bool, ) *PodEvictor { var nodePodCount = make(nodePodEvictedCount) for _, node := range nodes { @@ -58,14 +67,46 @@ func NewPodEvictor( } return &PodEvictor{ - client: client, - policyGroupVersion: policyGroupVersion, - dryRun: dryRun, - maxPodsToEvict: maxPodsToEvict, - nodepodCount: nodePodCount, + client: client, + policyGroupVersion: policyGroupVersion, + dryRun: dryRun, + maxPodsToEvict: maxPodsToEvict, + nodepodCount: nodePodCount, + evictLocalStoragePods: evictLocalStoragePods, } } +// IsEvictable checks if a pod is evictable or not. +func (pe *PodEvictor) IsEvictable(pod *v1.Pod) bool { + checkErrs := []error{} + if IsCriticalPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is critical")) + } + + ownerRefList := podutil.OwnerRef(pod) + if IsDaemonsetPod(ownerRefList) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod")) + } + + if len(ownerRefList) == 0 { + checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs")) + } + + if !pe.evictLocalStoragePods && IsPodWithLocalStorage(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods")) + } + + if IsMirrorPod(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod")) + } + + if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) { + klog.V(4).Infof("Pod %s in namespace %s is not evictable: Pod lacks an eviction annotation and fails the following checks: %v", pod.Name, pod.Namespace, errors.NewAggregate(checkErrs).Error()) + return false + } + return true +} + // NodeEvicted gives a number of pods evicted for node func (pe *PodEvictor) NodeEvicted(node *v1.Node) int { return pe.nodepodCount[node] @@ -139,3 +180,37 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli } return err } + +func IsCriticalPod(pod *v1.Pod) bool { + return utils.IsCriticalPod(pod) +} + +func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { + for _, ownerRef := range ownerRefList { + if ownerRef.Kind == "DaemonSet" { + return true + } + } + return false +} + +// IsMirrorPod checks whether the pod is a mirror pod. +func IsMirrorPod(pod *v1.Pod) bool { + return utils.IsMirrorPod(pod) +} + +// HaveEvictAnnotation checks if the pod have evict annotation +func HaveEvictAnnotation(pod *v1.Pod) bool { + _, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey] + return found +} + +func IsPodWithLocalStorage(pod *v1.Pod) bool { + for _, volume := range pod.Spec.Volumes { + if volume.HostPath != nil || volume.EmptyDir != nil { + return true + } + } + + return false +} diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index 0d6bf1bb8..52c9d3f5f 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -21,9 +21,12 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/utils" "sigs.k8s.io/descheduler/test" ) @@ -65,3 +68,217 @@ func TestEvictPod(t *testing.T) { } } } + +func TestIsEvictable(t *testing.T) { + n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil) + type testCase struct { + pod *v1.Pod + runBefore func(*v1.Pod) + evictLocalStoragePods bool + result bool + } + + testCases := []testCase{ + { + pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: true, + result: true, + }, { + pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetMirrorPodAnnotation() + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + pod.Annotations = test.GetMirrorPodAnnotation() + pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }, + evictLocalStoragePods: false, + result: true, + }, { + pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }, + evictLocalStoragePods: false, + result: false, + }, { + pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil), + runBefore: func(pod *v1.Pod) { + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + pod.Annotations = map[string]string{ + "descheduler.alpha.kubernetes.io/evict": "true", + } + }, + evictLocalStoragePods: false, + result: true, + }, + } + + for _, test := range testCases { + test.runBefore(test.pod) + podEvictor := &PodEvictor{ + evictLocalStoragePods: test.evictLocalStoragePods, + } + result := podEvictor.IsEvictable(test.pod) + if result != test.result { + t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) + } + + } +} +func TestPodTypes(t *testing.T) { + n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil) + p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil) + + // These won't be evicted. + p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil) + p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil) + p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil) + p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil) + p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil) + + p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + + p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + // The following 4 pods won't get evicted. + // A daemonset. + //p2.Annotations = test.GetDaemonSetAnnotation() + p2.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + // A pod with local storage. + p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + p3.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + // A Mirror Pod. + p4.Annotations = test.GetMirrorPodAnnotation() + // A Critical Pod. + p5.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + p5.Spec.Priority = &priority + systemCriticalPriority := utils.SystemCriticalPriority + p5.Spec.Priority = &systemCriticalPriority + if !IsMirrorPod(p4) { + t.Errorf("Expected p4 to be a mirror pod.") + } + if !IsCriticalPod(p5) { + t.Errorf("Expected p5 to be a critical pod.") + } + if !IsPodWithLocalStorage(p3) { + t.Errorf("Expected p3 to be a pod with local storage.") + } + ownerRefList := podutil.OwnerRef(p2) + if !IsDaemonsetPod(ownerRefList) { + t.Errorf("Expected p2 to be a daemonset pod.") + } + ownerRefList = podutil.OwnerRef(p1) + if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) { + t.Errorf("Expected p1 to be a normal pod.") + } + +} diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index e31b2d59b..91f0f0fed 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -18,69 +18,19 @@ package pod import ( "context" - "fmt" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" - "k8s.io/apimachinery/pkg/util/errors" clientset "k8s.io/client-go/kubernetes" - "k8s.io/klog" "sigs.k8s.io/descheduler/pkg/utils" ) -const ( - evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict" -) - -// IsEvictable checks if a pod is evictable or not. -func IsEvictable(pod *v1.Pod, evictLocalStoragePods bool) bool { - checkErrs := []error{} - if IsCriticalPod(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod is critical")) - } - - ownerRefList := OwnerRef(pod) - if IsDaemonsetPod(ownerRefList) { - checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod")) - } - - if len(ownerRefList) == 0 { - checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs")) - } - - if !evictLocalStoragePods && IsPodWithLocalStorage(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods")) - } - - if IsMirrorPod(pod) { - checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod")) - } - - if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) { - klog.V(4).Infof("Pod %s in namespace %s is not evictable: Pod lacks an eviction annotation and fails the following checks: %v", pod.Name, pod.Namespace, errors.NewAggregate(checkErrs).Error()) - return false - } - return true -} - -// ListEvictablePodsOnNode returns the list of evictable pods on node. -func ListEvictablePodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, evictLocalStoragePods bool) ([]*v1.Pod, error) { - pods, err := ListPodsOnANode(ctx, client, node) - if err != nil { - return []*v1.Pod{}, err - } - evictablePods := make([]*v1.Pod, 0) - for _, pod := range pods { - if !IsEvictable(pod, evictLocalStoragePods) { - continue - } else { - evictablePods = append(evictablePods, pod) - } - } - return evictablePods, nil -} - -func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node) ([]*v1.Pod, error) { +// ListPodsOnANode lists all of the pods on a node +// It also accepts an optional "filter" function which can be used to further limit the pods that are returned. +// (Usually this is podEvictor.IsEvictable, in order to only list the evictable pods on a node, but can +// be used by strategies to extend IsEvictable if there are further restrictions, such as with NodeAffinity). +// The filter function should return true if the pod should be returned from ListPodsOnANode +func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, filter func(pod *v1.Pod) bool) ([]*v1.Pod, error) { fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed)) if err != nil { return []*v1.Pod{}, err @@ -94,13 +44,17 @@ func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.N pods := make([]*v1.Pod, 0) for i := range podList.Items { + if filter != nil && !filter(&podList.Items[i]) { + continue + } pods = append(pods, &podList.Items[i]) } return pods, nil } -func IsCriticalPod(pod *v1.Pod) bool { - return utils.IsCriticalPod(pod) +// OwnerRef returns the ownerRefList for the pod. +func OwnerRef(pod *v1.Pod) []metav1.OwnerReference { + return pod.ObjectMeta.GetOwnerReferences() } func IsBestEffortPod(pod *v1.Pod) bool { @@ -114,38 +68,3 @@ func IsBurstablePod(pod *v1.Pod) bool { func IsGuaranteedPod(pod *v1.Pod) bool { return utils.GetPodQOS(pod) == v1.PodQOSGuaranteed } - -func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool { - for _, ownerRef := range ownerRefList { - if ownerRef.Kind == "DaemonSet" { - return true - } - } - return false -} - -// IsMirrorPod checks whether the pod is a mirror pod. -func IsMirrorPod(pod *v1.Pod) bool { - return utils.IsMirrorPod(pod) -} - -// HaveEvictAnnotation checks if the pod have evict annotation -func HaveEvictAnnotation(pod *v1.Pod) bool { - _, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey] - return found -} - -func IsPodWithLocalStorage(pod *v1.Pod) bool { - for _, volume := range pod.Spec.Volumes { - if volume.HostPath != nil || volume.EmptyDir != nil { - return true - } - } - - return false -} - -// OwnerRef returns the ownerRefList for the pod. -func OwnerRef(pod *v1.Pod) []metav1.OwnerReference { - return pod.ObjectMeta.GetOwnerReferences() -} diff --git a/pkg/descheduler/pod/pods_test.go b/pkg/descheduler/pod/pods_test.go index 43e5e7afb..aed6361b6 100644 --- a/pkg/descheduler/pod/pods_test.go +++ b/pkg/descheduler/pod/pods_test.go @@ -17,221 +17,53 @@ limitations under the License. package pod import ( + "context" + "fmt" + "strings" "testing" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - "sigs.k8s.io/descheduler/pkg/utils" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" "sigs.k8s.io/descheduler/test" ) -func TestIsEvictable(t *testing.T) { - n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil) - type testCase struct { - pod *v1.Pod - runBefore func(*v1.Pod) - evictLocalStoragePods bool - result bool - } - - testCases := []testCase{ +func TestListPodsOnANode(t *testing.T) { + testCases := []struct { + name string + pods map[string][]v1.Pod + node *v1.Node + expectedPodCount int + }{ { - pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + name: "test listing pods on a node", + pods: map[string][]v1.Pod{ + "n1": { + *test.BuildTestPod("pod1", 100, 0, "n1", nil), + *test.BuildTestPod("pod2", 100, 0, "n1", nil), + }, + "n2": {*test.BuildTestPod("pod3", 100, 0, "n2", nil)}, }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() - }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() - }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, - }, - }, - } - }, - evictLocalStoragePods: false, - result: false, - }, { - pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, - }, - }, - } - }, - evictLocalStoragePods: true, - result: true, - }, { - pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, - }, - }, - } - }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() - }, - evictLocalStoragePods: false, - result: false, - }, { - pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() - }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.Annotations = test.GetMirrorPodAnnotation() - }, - evictLocalStoragePods: false, - result: false, - }, { - pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - pod.Annotations = test.GetMirrorPodAnnotation() - pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" - }, - evictLocalStoragePods: false, - result: true, - }, { - pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - priority := utils.SystemCriticalPriority - pod.Spec.Priority = &priority - }, - evictLocalStoragePods: false, - result: false, - }, { - pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod) { - priority := utils.SystemCriticalPriority - pod.Spec.Priority = &priority - pod.Annotations = map[string]string{ - "descheduler.alpha.kubernetes.io/evict": "true", - } - }, - evictLocalStoragePods: false, - result: true, + node: test.BuildTestNode("n1", 2000, 3000, 10, nil), + expectedPodCount: 2, }, } - - for _, test := range testCases { - test.runBefore(test.pod) - result := IsEvictable(test.pod, test.evictLocalStoragePods) - if result != test.result { - t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) + for _, testCase := range testCases { + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + list := action.(core.ListAction) + fieldString := list.GetListRestrictions().Fields.String() + if strings.Contains(fieldString, "n1") { + return true, &v1.PodList{Items: testCase.pods["n1"]}, nil + } else if strings.Contains(fieldString, "n2") { + return true, &v1.PodList{Items: testCase.pods["n2"]}, nil + } + return true, nil, fmt.Errorf("Failed to list: %v", list) + }) + pods, _ := ListPodsOnANode(context.TODO(), fakeClient, testCase.node, nil) + if len(pods) != testCase.expectedPodCount { + t.Errorf("expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods)) } - } } -func TestPodTypes(t *testing.T) { - n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil) - p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil) - - // These won't be evicted. - p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil) - p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil) - p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil) - p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil) - p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil) - - p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - - p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() - // The following 4 pods won't get evicted. - // A daemonset. - //p2.Annotations = test.GetDaemonSetAnnotation() - p2.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() - // A pod with local storage. - p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - p3.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, - }, - }, - } - // A Mirror Pod. - p4.Annotations = test.GetMirrorPodAnnotation() - // A Critical Pod. - p5.Namespace = "kube-system" - priority := utils.SystemCriticalPriority - p5.Spec.Priority = &priority - systemCriticalPriority := utils.SystemCriticalPriority - p5.Spec.Priority = &systemCriticalPriority - if !IsMirrorPod(p4) { - t.Errorf("Expected p4 to be a mirror pod.") - } - if !IsCriticalPod(p5) { - t.Errorf("Expected p5 to be a critical pod.") - } - if !IsPodWithLocalStorage(p3) { - t.Errorf("Expected p3 to be a pod with local storage.") - } - ownerRefList := OwnerRef(p2) - if !IsDaemonsetPod(ownerRefList) { - t.Errorf("Expected p2 to be a daemonset pod.") - } - ownerRefList = OwnerRef(p1) - if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) { - t.Errorf("Expected p1 to be a normal pod.") - } - -} diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index 1d16f96e9..568a6c99a 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -42,12 +42,64 @@ func RemoveDuplicatePods( client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, - evictLocalStoragePods bool, podEvictor *evictions.PodEvictor, ) { for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) - duplicatePods := listDuplicatePodsOnANode(ctx, client, node, strategy, evictLocalStoragePods) + pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable) + if err != nil { + klog.Errorf("error listing evictable pods on node %s: %+v", node.Name, err) + continue + } + + duplicatePods := make([]*v1.Pod, 0, len(pods)) + // Each pod has a list of owners and a list of containers, and each container has 1 image spec. + // For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string. + // All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness. + // This list of keys representing a single pod is then sorted alphabetically. + // If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons: + // - The 2 pods have the exact same ownerrefs + // - The 2 pods have the exact same container images + // + // duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key + // (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have + // the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed). + // If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate. + // If not, then we add this pod's list to the list of lists for that key. + duplicateKeysMap := map[string][][]string{} + for _, pod := range pods { + ownerRefList := podutil.OwnerRef(pod) + if hasExcludedOwnerRefKind(ownerRefList, strategy) { + continue + } + podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers)) + for _, ownerRef := range ownerRefList { + for _, container := range pod.Spec.Containers { + // Namespace/Kind/Name should be unique for the cluster. + // We also consider the image, as 2 pods could have the same owner but serve different purposes + // So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod. + s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/") + podContainerKeys = append(podContainerKeys, s) + } + } + sort.Strings(podContainerKeys) + + // If there have been any other pods with the same first "key", look through all the lists to see if any match + if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok { + for _, keys := range existing { + if reflect.DeepEqual(keys, podContainerKeys) { + duplicatePods = append(duplicatePods, pod) + break + } + // Found no matches, add this list of keys to the list of lists that have the same first key + duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys) + } + } else { + // This is the first pod we've seen that has this first "key" entry + duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys} + } + } + for _, pod := range duplicatePods { if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil { klog.Errorf("Error evicting pod: (%#v)", err) @@ -57,64 +109,6 @@ func RemoveDuplicatePods( } } -// listDuplicatePodsOnANode lists duplicate pods on a given node. -// It checks for pods which have the same owner and have at least 1 container with the same image spec -func listDuplicatePodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, strategy api.DeschedulerStrategy, evictLocalStoragePods bool) []*v1.Pod { - pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods) - if err != nil { - return nil - } - - duplicatePods := make([]*v1.Pod, 0, len(pods)) - // Each pod has a list of owners and a list of containers, and each container has 1 image spec. - // For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string. - // All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness. - // This list of keys representing a single pod is then sorted alphabetically. - // If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons: - // - The 2 pods have the exact same ownerrefs - // - The 2 pods have the exact same container images - // - // duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key - // (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have - // the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed). - // If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate. - // If not, then we add this pod's list to the list of lists for that key. - duplicateKeysMap := map[string][][]string{} - for _, pod := range pods { - ownerRefList := podutil.OwnerRef(pod) - if hasExcludedOwnerRefKind(ownerRefList, strategy) { - continue - } - podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers)) - for _, ownerRef := range ownerRefList { - for _, container := range pod.Spec.Containers { - // Namespace/Kind/Name should be unique for the cluster. - // We also consider the image, as 2 pods could have the same owner but serve different purposes - // So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod. - s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/") - podContainerKeys = append(podContainerKeys, s) - } - } - sort.Strings(podContainerKeys) - - // If there have been any other pods with the same first "key", look through all the lists to see if any match - if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok { - for _, keys := range existing { - if reflect.DeepEqual(keys, podContainerKeys) { - duplicatePods = append(duplicatePods, pod) - break - } - // Found no matches, add this list of keys to the list of lists that have the same first key - duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys) - } - } else { - // This is the first pod we've seen that has this first "key" entry - duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys} - } - } - return duplicatePods -} - func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool { if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil { return false diff --git a/pkg/descheduler/strategies/duplicates_test.go b/pkg/descheduler/strategies/duplicates_test.go index 9a9ac91b1..f2cefa37c 100644 --- a/pkg/descheduler/strategies/duplicates_test.go +++ b/pkg/descheduler/strategies/duplicates_test.go @@ -199,9 +199,10 @@ func TestFindDuplicatePods(t *testing.T) { false, testCase.maxPodsToEvict, []*v1.Node{node}, + false, ) - RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, false, podEvictor) + RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, podEvictor) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != testCase.expectedEvictedPodCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted) diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 23594690e..26d609cd9 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -52,7 +52,7 @@ const ( // LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used // to calculate nodes' utilization and not the actual resource usage. -func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { +func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { // todo: move to config validation? // TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params? if strategy.Params == nil || strategy.Params.NodeResourceUtilizationThresholds == nil { @@ -81,7 +81,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg } npm := createNodePodsMap(ctx, client, nodes) - lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds, evictLocalStoragePods) + lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds) klog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v", thresholds[v1.ResourceCPU], thresholds[v1.ResourceMemory], thresholds[v1.ResourcePods]) @@ -116,7 +116,6 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg targetNodes, lowNodes, targetThresholds, - evictLocalStoragePods, podEvictor) klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted()) @@ -166,10 +165,10 @@ func validateThresholds(thresholds api.ResourceThresholds) error { // classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between // low and high thresholds, it is simply ignored. -func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds, evictLocalStoragePods bool) ([]NodeUsageMap, []NodeUsageMap) { +func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds) ([]NodeUsageMap, []NodeUsageMap) { lowNodes, targetNodes := []NodeUsageMap{}, []NodeUsageMap{} for node, pods := range npm { - usage := nodeUtilization(node, pods, evictLocalStoragePods) + usage := nodeUtilization(node, pods) nuMap := NodeUsageMap{ node: node, usage: usage, @@ -196,7 +195,6 @@ func evictPodsFromTargetNodes( ctx context.Context, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, - evictLocalStoragePods bool, podEvictor *evictions.PodEvictor, ) { @@ -234,7 +232,7 @@ func evictPodsFromTargetNodes( } klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage) - nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods := classifyPods(node.allPods, evictLocalStoragePods) + nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods := classifyPods(node.allPods, podEvictor) klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, bestEffortPods:%v, burstablePods:%v, guaranteedPods:%v", len(node.allPods), len(nonRemovablePods), len(bestEffortPods), len(burstablePods), len(guaranteedPods)) if len(node.allPods) == len(nonRemovablePods) { @@ -366,7 +364,7 @@ func sortPodsBasedOnPriority(evictablePods []*v1.Pod) { func createNodePodsMap(ctx context.Context, client clientset.Interface, nodes []*v1.Node) NodePodsMap { npm := NodePodsMap{} for _, node := range nodes { - pods, err := podutil.ListPodsOnANode(ctx, client, node) + pods, err := podutil.ListPodsOnANode(ctx, client, node, nil) if err != nil { klog.Warningf("node %s will not be processed, error in accessing its pods (%#v)", node.Name, err) } else { @@ -404,7 +402,7 @@ func isNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds return true } -func nodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) api.ResourceThresholds { +func nodeUtilization(node *v1.Node, pods []*v1.Pod) api.ResourceThresholds { totalReqs := map[v1.ResourceName]*resource.Quantity{ v1.ResourceCPU: {}, v1.ResourceMemory: {}, @@ -433,7 +431,7 @@ func nodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) } } -func classifyPods(pods []*v1.Pod, evictLocalStoragePods bool) ([]*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) { +func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor) ([]*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) { var nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods []*v1.Pod // From https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/ @@ -447,7 +445,7 @@ func classifyPods(pods []*v1.Pod, evictLocalStoragePods bool) ([]*v1.Pod, []*v1. // For a Pod to be given a QoS class of BestEffort, the Containers in the Pod must not have any memory or CPU limits or requests. for _, pod := range pods { - if !podutil.IsEvictable(pod, evictLocalStoragePods) { + if !evictor.IsEvictable(pod) { nonRemovablePods = append(nonRemovablePods, pod) continue } diff --git a/pkg/descheduler/strategies/lownodeutilization_test.go b/pkg/descheduler/strategies/lownodeutilization_test.go index 5afc0c0a6..bb346ebfc 100644 --- a/pkg/descheduler/strategies/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/lownodeutilization_test.go @@ -470,6 +470,7 @@ func TestLowNodeUtilization(t *testing.T) { false, test.expectedPodsEvicted, nodes, + false, ) strategy := api.DeschedulerStrategy{ @@ -481,7 +482,7 @@ func TestLowNodeUtilization(t *testing.T) { }, }, } - LowNodeUtilization(ctx, fakeClient, strategy, nodes, false, podEvictor) + LowNodeUtilization(ctx, fakeClient, strategy, nodes, podEvictor) podsEvicted := podEvictor.TotalEvicted() if test.expectedPodsEvicted != podsEvicted { @@ -879,9 +880,10 @@ func TestWithTaints(t *testing.T) { false, item.evictionsExpected, item.nodes, + false, ) - LowNodeUtilization(ctx, &fake.Clientset{Fake: *fakePtr}, strategy, item.nodes, false, podEvictor) + LowNodeUtilization(ctx, &fake.Clientset{Fake: *fakePtr}, strategy, item.nodes, podEvictor) if item.evictionsExpected != evictionCounter { t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter) diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index ad860fe7f..4a489ddcf 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -30,7 +30,7 @@ import ( ) // RemovePodsViolatingNodeAffinity evicts pods on nodes which violate node affinity -func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { +func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { if strategy.Params == nil { klog.V(1).Infof("NodeAffinityType not set") return @@ -43,19 +43,21 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) - pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods) + pods, err := podutil.ListPodsOnANode(ctx, client, node, func(pod *v1.Pod) bool { + return podEvictor.IsEvictable(pod) && + !nodeutil.PodFitsCurrentNode(pod, node) && + nodeutil.PodFitsAnyNode(pod, nodes) + }) if err != nil { klog.Errorf("failed to get pods from %v: %v", node.Name, err) } for _, pod := range pods { if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { - if !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) { - klog.V(1).Infof("Evicting pod: %v", pod.Name) - if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil { - klog.Errorf("Error evicting pod: (%#v)", err) - break - } + klog.V(1).Infof("Evicting pod: %v", pod.Name) + if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) + break } } } diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/descheduler/strategies/node_affinity_test.go index 997529425..da206790d 100644 --- a/pkg/descheduler/strategies/node_affinity_test.go +++ b/pkg/descheduler/strategies/node_affinity_test.go @@ -157,9 +157,10 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { false, tc.maxPodsToEvict, tc.nodes, + false, ) - RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, false, podEvictor) + RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor) actualEvictedPodCount := podEvictor.TotalEvicted() if actualEvictedPodCount != tc.expectedEvictedPodCount { t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index 2c1dc1989..690e41338 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -30,10 +30,10 @@ import ( ) // RemovePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes -func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { +func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) - pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods) + pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable) if err != nil { //no pods evicted as error encountered retrieving evictable Pods return diff --git a/pkg/descheduler/strategies/node_taint_test.go b/pkg/descheduler/strategies/node_taint_test.go index 46c89c5fd..d7f51fb8c 100644 --- a/pkg/descheduler/strategies/node_taint_test.go +++ b/pkg/descheduler/strategies/node_taint_test.go @@ -170,9 +170,10 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { false, tc.maxPodsToEvict, tc.nodes, + tc.evictLocalStoragePods, ) - RemovePodsViolatingNodeTaints(ctx, fakeClient, api.DeschedulerStrategy{}, tc.nodes, tc.evictLocalStoragePods, podEvictor) + RemovePodsViolatingNodeTaints(ctx, fakeClient, api.DeschedulerStrategy{}, tc.nodes, podEvictor) actualEvictedPodCount := podEvictor.TotalEvicted() if actualEvictedPodCount != tc.expectedEvictedPodCount { t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount) diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index e5a04e834..fc43419bd 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -30,10 +30,10 @@ import ( ) // RemovePodsViolatingInterPodAntiAffinity evicts pods on the node which are having a pod affinity rules. -func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { +func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) - pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods) + pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable) if err != nil { return } diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/descheduler/strategies/pod_antiaffinity_test.go index 4c4febda3..37fdf4617 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity_test.go +++ b/pkg/descheduler/strategies/pod_antiaffinity_test.go @@ -84,9 +84,10 @@ func TestPodAntiAffinity(t *testing.T) { false, test.maxPodsToEvict, []*v1.Node{node}, + false, ) - RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, api.DeschedulerStrategy{}, []*v1.Node{node}, false, podEvictor) + RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, api.DeschedulerStrategy{}, []*v1.Node{node}, podEvictor) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != test.expectedEvictedPodCount { t.Errorf("Unexpected no of pods evicted: pods evicted: %d, expected: %d", podsEvicted, test.expectedEvictedPodCount) diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 90ff1cf72..b1287abbf 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -30,7 +30,7 @@ import ( ) // PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago. -func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { +func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { if strategy.Params == nil || strategy.Params.MaxPodLifeTimeSeconds == nil { klog.V(1).Infof("MaxPodLifeTimeSeconds not set") return @@ -38,7 +38,7 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) - pods := listOldPodsOnNode(ctx, client, node, *strategy.Params.MaxPodLifeTimeSeconds, evictLocalStoragePods) + pods := listOldPodsOnNode(ctx, client, node, *strategy.Params.MaxPodLifeTimeSeconds, podEvictor) for _, pod := range pods { success, err := podEvictor.EvictPod(ctx, pod, node) if success { @@ -53,8 +53,8 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D } } -func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, maxAge uint, evictLocalStoragePods bool) []*v1.Pod { - pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods) +func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, maxAge uint, evictor *evictions.PodEvictor) []*v1.Pod { + pods, err := podutil.ListPodsOnANode(ctx, client, node, evictor.IsEvictable) if err != nil { return nil } diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go index bc12a9df0..b017d2205 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -157,9 +157,10 @@ func TestPodLifeTime(t *testing.T) { false, tc.maxPodsToEvict, []*v1.Node{node}, + false, ) - PodLifeTime(ctx, fakeClient, tc.strategy, []*v1.Node{node}, false, podEvictor) + PodLifeTime(ctx, fakeClient, tc.strategy, []*v1.Node{node}, podEvictor) podsEvicted := podEvictor.TotalEvicted() if podsEvicted != tc.expectedEvictedPodCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.description, tc.expectedEvictedPodCount, podsEvicted) diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index 3ac888142..5991e0393 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -31,14 +31,14 @@ import ( // RemovePodsHavingTooManyRestarts removes the pods that have too many restarts on node. // There are too many cases leading this issue: Volume mount failed, app error due to nodes' different settings. // As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages. -func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { +func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { if strategy.Params == nil || strategy.Params.PodsHavingTooManyRestarts == nil || strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold < 1 { klog.V(1).Infof("PodsHavingTooManyRestarts thresholds not set") return } for _, node := range nodes { klog.V(1).Infof("Processing node: %s", node.Name) - pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods) + pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable) if err != nil { klog.Errorf("Error when list pods at node %s", node.Name) continue diff --git a/pkg/descheduler/strategies/toomanyrestarts_test.go b/pkg/descheduler/strategies/toomanyrestarts_test.go index a37faad57..1c3541874 100644 --- a/pkg/descheduler/strategies/toomanyrestarts_test.go +++ b/pkg/descheduler/strategies/toomanyrestarts_test.go @@ -171,9 +171,10 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { false, tc.maxPodsToEvict, []*v1.Node{node}, + false, ) - RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, []*v1.Node{node}, false, podEvictor) + RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, []*v1.Node{node}, podEvictor) actualEvictedPodCount := podEvictor.TotalEvicted() if actualEvictedPodCount != tc.expectedEvictedPodCount { t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 5125f2bf1..b5c9280ab 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -29,6 +29,7 @@ import ( coreinformers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" + "sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/pkg/api" deschedulerapi "sigs.k8s.io/descheduler/pkg/api" @@ -132,9 +133,10 @@ func startEndToEndForLowNodeUtilization(ctx context.Context, clientset clientset false, 0, nodes, + false, ) - strategies.LowNodeUtilization(ctx, clientset, lowNodeUtilizationStrategy, nodes, false, podEvictor) + strategies.LowNodeUtilization(ctx, clientset, lowNodeUtilizationStrategy, nodes, podEvictor) time.Sleep(10 * time.Second) } @@ -150,6 +152,11 @@ func TestE2E(t *testing.T) { if err != nil { t.Errorf("Error listing node with %v", err) } + var nodes []*v1.Node + for i := range nodeList.Items { + node := nodeList.Items[i] + nodes = append(nodes, &node) + } sharedInformerFactory := informers.NewSharedInformerFactory(clientSet, 0) nodeInformer := sharedInformerFactory.Core().V1().Nodes() @@ -165,7 +172,7 @@ func TestE2E(t *testing.T) { if err != nil { t.Errorf("Error creating deployment %v", err) } - evictPods(ctx, t, clientSet, nodeInformer, nodeList, rc) + evictPods(ctx, t, clientSet, nodeInformer, nodes, rc) rc.Spec.Template.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} rc.Spec.Replicas = func(i int32) *int32 { return &i }(15) @@ -182,7 +189,7 @@ func TestE2E(t *testing.T) { if err != nil { t.Errorf("Error creating deployment %v", err) } - evictPods(ctx, t, clientSet, nodeInformer, nodeList, rc) + evictPods(ctx, t, clientSet, nodeInformer, nodes, rc) } func TestDeschedulingInterval(t *testing.T) { @@ -220,28 +227,40 @@ func TestDeschedulingInterval(t *testing.T) { } } -func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeList *v1.NodeList, rc *v1.ReplicationController) { - var leastLoadedNode v1.Node +func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeList []*v1.Node, rc *v1.ReplicationController) { + var leastLoadedNode *v1.Node podsBefore := math.MaxInt16 - for i := range nodeList.Items { + evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) + if err != nil || len(evictionPolicyGroupVersion) == 0 { + klog.Fatalf("%v", err) + } + podEvictor := evictions.NewPodEvictor( + clientSet, + evictionPolicyGroupVersion, + false, + 0, + nodeList, + true, + ) + for _, node := range nodeList { // Skip the Master Node - if _, exist := nodeList.Items[i].Labels["node-role.kubernetes.io/master"]; exist { + if _, exist := node.Labels["node-role.kubernetes.io/master"]; exist { continue } // List all the pods on the current Node - podsOnANode, err := podutil.ListEvictablePodsOnNode(ctx, clientSet, &nodeList.Items[i], true) + podsOnANode, err := podutil.ListPodsOnANode(ctx, clientSet, node, podEvictor.IsEvictable) if err != nil { t.Errorf("Error listing pods on a node %v", err) } // Update leastLoadedNode if necessary if tmpLoads := len(podsOnANode); tmpLoads < podsBefore { - leastLoadedNode = nodeList.Items[i] + leastLoadedNode = node podsBefore = tmpLoads } } t.Log("Eviction of pods starting") startEndToEndForLowNodeUtilization(ctx, clientSet, nodeInformer) - podsOnleastUtilizedNode, err := podutil.ListEvictablePodsOnNode(ctx, clientSet, &leastLoadedNode, true) + podsOnleastUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, leastLoadedNode, podEvictor.IsEvictable) if err != nil { t.Errorf("Error listing pods on a node %v", err) }