diff --git a/README.md b/README.md index 8d09851f5..068e45a48 100644 --- a/README.md +++ b/README.md @@ -226,10 +226,10 @@ If that parameter is set to `true`, the thresholds are considered as percentage `thresholds` will be deducted from the mean among all nodes and `targetThresholds` will be added to the mean. A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization). -**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage. -This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same -design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands -like `kubectl top`) may differ from the calculated consumption, due to these components reporting +**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage. +This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same +design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands +like `kubectl top`) may differ from the calculated consumption, due to these components reporting actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project. **Parameters:** @@ -280,8 +280,8 @@ under utilized frequently or for a short period of time. By default, `numberOfNo ### HighNodeUtilization -This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be -scheduled compactly into fewer nodes. Used in conjunction with node auto-scaling, this strategy is intended to help +This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be +scheduled compactly into fewer nodes. Used in conjunction with node auto-scaling, this strategy is intended to help trigger down scaling of under utilized nodes. This strategy **must** be used with the scheduler scoring strategy `MostAllocated`. The parameters of this strategy are configured under `nodeResourceUtilizationThresholds`. @@ -300,10 +300,10 @@ strategy evicts pods from `underutilized nodes` (those with usage below `thresho so that they can be recreated in appropriately utilized nodes. The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero. -**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage. -This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same -design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands -like `kubectl top`) may differ from the calculated consumption, due to these components reporting +**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage. +This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same +design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands +like `kubectl top`) may differ from the calculated consumption, due to these components reporting actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project. **Parameters:** @@ -737,8 +737,9 @@ The following strategies accept a `nodeFit` boolean parameter which can optimize If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`: - A `nodeSelector` on the pod -- Any `Tolerations` on the pod and any `Taints` on the other nodes +- Any `tolerations` on the pod and any `taints` on the other nodes - `nodeAffinity` on the pod +- Resource `requests` made by the pod and the resources available on other nodes - Whether any of the other nodes are marked as `unschedulable` E.g. diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 33def9534..f1bd61a81 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -283,6 +283,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer deschedulerPolicy.MaxNoOfPodsToEvictPerNode, deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace, nodes, + getPodsAssignedToNode, evictLocalStoragePods, evictSystemCriticalPods, ignorePvcPods, diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 0c3974e06..742388aba 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -51,6 +51,7 @@ type namespacePodEvictCount map[string]uint type PodEvictor struct { client clientset.Interface nodes []*v1.Node + nodeIndexer podutil.GetPodsAssignedToNodeFunc policyGroupVersion string dryRun bool maxPodsToEvictPerNode *uint @@ -71,6 +72,7 @@ func NewPodEvictor( maxPodsToEvictPerNode *uint, maxPodsToEvictPerNamespace *uint, nodes []*v1.Node, + nodeIndexer podutil.GetPodsAssignedToNodeFunc, evictLocalStoragePods bool, evictSystemCriticalPods bool, ignorePvcPods bool, @@ -87,6 +89,7 @@ func NewPodEvictor( return &PodEvictor{ client: client, nodes: nodes, + nodeIndexer: nodeIndexer, policyGroupVersion: policyGroupVersion, dryRun: dryRun, maxPodsToEvictPerNode: maxPodsToEvictPerNode, @@ -296,7 +299,7 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable { } if options.nodeFit { ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { - if !nodeutil.PodFitsAnyOtherNode(pod, pe.nodes) { + if !nodeutil.PodFitsAnyOtherNode(pe.nodeIndexer, pod, pe.nodes) { return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable") } return nil diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index d7e8b46dd..131349f41 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -21,8 +21,10 @@ import ( "testing" v1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" @@ -80,9 +82,9 @@ func TestIsEvictable(t *testing.T) { nodeLabelKey := "datacenter" nodeLabelValue := "east" type testCase struct { - pod *v1.Pod + description string + pods []*v1.Pod nodes []*v1.Node - runBefore func(*v1.Pod, []*v1.Node) evictFailedBarePods bool evictLocalStoragePods bool evictSystemCriticalPods bool @@ -92,261 +94,309 @@ func TestIsEvictable(t *testing.T) { } testCases := []testCase{ - { // Failed pod eviction with no ownerRefs. - pod: test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Status.Phase = v1.PodFailed + { + description: "Failed pod eviction with no ownerRefs", + pods: []*v1.Pod{ + test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Status.Phase = v1.PodFailed + }), }, evictFailedBarePods: false, result: false, - }, { // Normal pod eviction with no ownerRefs and evictFailedBarePods enabled - pod: test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - }, + }, { + description: "Normal pod eviction with no ownerRefs and evictFailedBarePods enabled", + pods: []*v1.Pod{test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil)}, evictFailedBarePods: true, result: false, - }, { // Failed pod eviction with no ownerRefs - pod: test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Status.Phase = v1.PodFailed + }, { + description: "Failed pod eviction with no ownerRefs", + pods: []*v1.Pod{ + test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Status.Phase = v1.PodFailed + }), }, evictFailedBarePods: true, result: true, - }, { // Normal pod eviction with normal ownerRefs - pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, { + description: "Normal pod eviction with normal ownerRefs", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }, { + description: "Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Normal pod eviction with replicaSet ownerRefs - pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, { + description: "Normal pod eviction with replicaSet ownerRefs", + pods: []*v1.Pod{ + test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }, { + description: "Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p4", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Normal pod eviction with statefulSet ownerRefs - pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + }, { + description: "Normal pod eviction with statefulSet ownerRefs", + pods: []*v1.Pod{ + test.BuildTestPod("p18", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + }, { + description: "Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p19", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false - pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, { + description: "Pod not evicted because it is bound to a PV and evictLocalStoragePods = false", + pods: []*v1.Pod{ + test.BuildTestPod("p5", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, }, - }, - } + } + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: false, - }, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true - pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, { + description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = true", + pods: []*v1.Pod{ + test.BuildTestPod("p6", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, }, - }, - } + } + }), }, evictLocalStoragePods: true, evictSystemCriticalPods: false, result: true, - }, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Volumes = []v1.Volume{ - { - Name: "sample", - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, - EmptyDir: &v1.EmptyDirVolumeSource{ - SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, { + description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p7", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, }, - }, - } + } + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Pod not evicted becasuse it is part of a daemonSet - pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, { + description: "Pod not evicted becasuse it is part of a daemonSet", + pods: []*v1.Pod{ + test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: false, - }, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }, { + description: "Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Pod not evicted becasuse it is a mirror pod - pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Annotations = test.GetMirrorPodAnnotation() + }, { + description: "Pod not evicted becasuse it is a mirror poddsa", + pods: []*v1.Pod{ + test.BuildTestPod("p10", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = test.GetMirrorPodAnnotation() + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: false, - }, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Annotations = test.GetMirrorPodAnnotation() - pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }, { + description: "Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = test.GetMirrorPodAnnotation() + pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true" + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Pod not evicted becasuse it has system critical priority - pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - priority := utils.SystemCriticalPriority - pod.Spec.Priority = &priority + }, { + description: "Pod not evicted becasuse it has system critical priority", + pods: []*v1.Pod{ + test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: false, - }, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - priority := utils.SystemCriticalPriority - pod.Spec.Priority = &priority - pod.Annotations = map[string]string{ - "descheduler.alpha.kubernetes.io/evict": "true", - } + }, { + description: "Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p13", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + pod.Annotations = map[string]string{ + "descheduler.alpha.kubernetes.io/evict": "true", + } + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, result: true, - }, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold - pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Priority = &highPriority + }, { + description: "Pod not evicted becasuse it has a priority higher than the configured priority threshold", + pods: []*v1.Pod{ + test.BuildTestPod("p14", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Priority = &highPriority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, priorityThreshold: &lowPriority, result: false, - }, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.Spec.Priority = &highPriority + }, { + description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p15", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.Spec.Priority = &highPriority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, priorityThreshold: &lowPriority, result: true, - }, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true - pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - priority := utils.SystemCriticalPriority - pod.Spec.Priority = &priority + }, { + description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true", + pods: []*v1.Pod{ + test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: true, result: true, - }, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - priority := utils.SystemCriticalPriority - pod.Spec.Priority = &priority + }, { + description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: true, result: true, - }, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true - pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Spec.Priority = &highPriority + }, { + description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true", + pods: []*v1.Pod{ + test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Priority = &highPriority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: true, priorityThreshold: &lowPriority, result: true, - }, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation - pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil), - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} - pod.Spec.Priority = &highPriority + }, { + description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation", + pods: []*v1.Pod{ + test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"} + pod.Spec.Priority = &highPriority + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: true, priorityThreshold: &lowPriority, result: true, - }, { // Pod with no tolerations running on normal node, all other nodes tainted - pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil), - nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - - for _, node := range nodes { + }, { + description: "Pod with no tolerations running on normal node, all other nodes tainted", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) { node.Spec.Taints = []v1.Taint{ { Key: nodeTaintKey, @@ -354,27 +404,8 @@ func TestIsEvictable(t *testing.T) { Effect: v1.TaintEffectNoSchedule, }, } - } - }, - evictLocalStoragePods: false, - evictSystemCriticalPods: false, - nodeFit: true, - result: false, - }, { // Pod with correct tolerations running on normal node, all other nodes tainted - pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { - pod.Spec.Tolerations = []v1.Toleration{ - { - Key: nodeTaintKey, - Value: nodeTaintValue, - Effect: v1.TaintEffectNoSchedule, - }, - } - }), - nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - - for _, node := range nodes { + }), + test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) { node.Spec.Taints = []v1.Taint{ { Key: nodeTaintKey, @@ -382,81 +413,259 @@ func TestIsEvictable(t *testing.T) { Effect: v1.TaintEffectNoSchedule, }, } - } - }, - evictLocalStoragePods: false, - evictSystemCriticalPods: false, - nodeFit: true, - result: true, - }, { // Pod with incorrect node selector - pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { - pod.Spec.NodeSelector = map[string]string{ - nodeLabelKey: "fail", - } - }), - nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - - for _, node := range nodes { - node.ObjectMeta.Labels = map[string]string{ - nodeLabelKey: nodeLabelValue, - } - } + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, nodeFit: true, result: false, - }, { // Pod with correct node selector - pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { - pod.Spec.NodeSelector = map[string]string{ - nodeLabelKey: nodeLabelValue, - } - }), - nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)}, - runBefore: func(pod *v1.Pod, nodes []*v1.Node) { - pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() - - for _, node := range nodes { - node.ObjectMeta.Labels = map[string]string{ - nodeLabelKey: nodeLabelValue, + }, { + description: "Pod with correct tolerations running on normal node, all other nodes tainted", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Tolerations = []v1.Toleration{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, } - } + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) { + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) { + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), }, evictLocalStoragePods: false, evictSystemCriticalPods: false, nodeFit: true, result: true, + }, { + description: "Pod with incorrect node selector", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: "fail", + } + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + nodeFit: true, + result: false, + }, { + description: "Pod with correct node selector", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + nodeFit: true, + result: true, + }, { + description: "Pod with correct node selector, but only available node doesn't have enough CPU", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2-TEST", 10, 16, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestNode("node3-TEST", 10, 16, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + nodeFit: true, + result: false, + }, { + description: "Pod with correct node selector, and one node has enough memory", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestPod("node2-pod-10GB-mem", 20, 10, "node2", func(pod *v1.Pod) { + pod.ObjectMeta.Labels = map[string]string{ + "test": "true", + } + }), + test.BuildTestPod("node3-pod-10GB-mem", 20, 10, "node3", func(pod *v1.Pod) { + pod.ObjectMeta.Labels = map[string]string{ + "test": "true", + } + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestNode("node3", 100, 20, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + nodeFit: true, + result: true, + }, { + description: "Pod with correct node selector, but both nodes don't have enough memory", + pods: []*v1.Pod{ + test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestPod("node2-pod-10GB-mem", 10, 10, "node2", func(pod *v1.Pod) { + pod.ObjectMeta.Labels = map[string]string{ + "test": "true", + } + }), + test.BuildTestPod("node3-pod-10GB-mem", 10, 10, "node3", func(pod *v1.Pod) { + pod.ObjectMeta.Labels = map[string]string{ + "test": "true", + } + }), + }, + nodes: []*v1.Node{ + test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + test.BuildTestNode("node3", 100, 16, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), + }, + evictLocalStoragePods: false, + evictSystemCriticalPods: false, + nodeFit: true, + result: false, }, } for _, test := range testCases { - test.runBefore(test.pod, test.nodes) - nodes := append(test.nodes, n1) - podEvictor := &PodEvictor{ - evictLocalStoragePods: test.evictLocalStoragePods, - evictSystemCriticalPods: test.evictSystemCriticalPods, - evictFailedBarePods: test.evictFailedBarePods, - nodes: nodes, - } + t.Run(test.description, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - evictable := podEvictor.Evictable() - var opts []func(opts *Options) - if test.priorityThreshold != nil { - opts = append(opts, WithPriorityThreshold(*test.priorityThreshold)) - } - if test.nodeFit { - opts = append(opts, WithNodeFit(true)) - } - evictable = podEvictor.Evictable(opts...) + nodes := append(test.nodes, n1) - result := evictable.IsEvictable(test.pod) - if result != test.result { - t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result) - } + var objs []runtime.Object + for _, node := range test.nodes { + objs = append(objs, node) + } + for _, pod := range test.pods { + objs = append(objs, pod) + } + fakeClient := fake.NewSimpleClientset(objs...) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + podInformer := sharedInformerFactory.Core().V1().Pods() + + getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) + if err != nil { + t.Errorf("Build get pods assigned to node function error: %v", err) + } + + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + podEvictor := &PodEvictor{ + client: fakeClient, + nodes: nodes, + nodeIndexer: getPodsAssignedToNode, + policyGroupVersion: policyv1.SchemeGroupVersion.String(), + dryRun: false, + maxPodsToEvictPerNode: nil, + maxPodsToEvictPerNamespace: nil, + evictLocalStoragePods: test.evictLocalStoragePods, + evictSystemCriticalPods: test.evictSystemCriticalPods, + evictFailedBarePods: test.evictFailedBarePods, + } + + var opts []func(opts *Options) + if test.priorityThreshold != nil { + opts = append(opts, WithPriorityThreshold(*test.priorityThreshold)) + } + if test.nodeFit { + opts = append(opts, WithNodeFit(true)) + } + evictable := podEvictor.Evictable(opts...) + + result := evictable.IsEvictable(test.pods[0]) + if result != test.result { + t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pods[0].Name, test.result, result) + } + }) } } func TestPodTypes(t *testing.T) { diff --git a/pkg/descheduler/node/node.go b/pkg/descheduler/node/node.go index 1c21e060c..e95606325 100644 --- a/pkg/descheduler/node/node.go +++ b/pkg/descheduler/node/node.go @@ -18,13 +18,16 @@ package node import ( "context" + "fmt" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" coreinformers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/pkg/utils" ) @@ -96,32 +99,90 @@ func IsReady(node *v1.Node) bool { return true } -// PodFitsAnyOtherNode checks if the given pod fits any of the given nodes, besides the node -// the pod is already running on. The node fit is based on multiple criteria, like, pod node selector -// matching the node label (including affinity), the taints on the node, and the node being schedulable or not. -func PodFitsAnyOtherNode(pod *v1.Pod, nodes []*v1.Node) bool { +// NodeFit returns true if the provided pod can be scheduled onto the provided node. +// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled. +// This function currently considers a subset of the Kubernetes Scheduler's predicates when +// deciding if a pod would fit on a node, but more predicates may be added in the future. +func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error { + // Check node selector and required affinity + var errors []error + if ok, err := utils.PodMatchNodeSelector(pod, node); err != nil { + errors = append(errors, err) + } else if !ok { + errors = append(errors, fmt.Errorf("pod node selector does not match the node label")) + } + // Check taints (we only care about NoSchedule and NoExecute taints) + ok := utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool { + return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute + }) + if !ok { + errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node")) + } + // Check if the pod can fit on a node based off it's requests + ok, reqErrors := fitsRequest(nodeIndexer, pod, node) + if !ok { + errors = append(errors, reqErrors...) + } + // Check if node is schedulable + if IsNodeUnschedulable(node) { + errors = append(errors, fmt.Errorf("node is not schedulable")) + } + return errors +} + +// PodFitsAnyOtherNode checks if the given pod will fit any of the given nodes, besides the node +// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function. +func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool { for _, node := range nodes { // Skip node pod is already on if node.Name == pod.Spec.NodeName { continue } - // Check node selector and required affinity - ok, err := utils.PodMatchNodeSelector(pod, node) - if err != nil || !ok { - continue - } - // Check taints (we only care about NoSchedule and NoExecute taints) - ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool { - return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute - }) - if !ok { - continue - } - // Check if node is schedulable - if !IsNodeUnschedulable(node) { - klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node)) + + errors := NodeFit(nodeIndexer, pod, node) + if len(errors) == 0 { + klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) return true + } else { + klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) + for _, err := range errors { + klog.V(4).InfoS(err.Error()) + } + } + } + return false +} + +// PodFitsAnyNode checks if the given pod will fit any of the given nodes. The predicates used +// to determine if the pod will fit can be found in the NodeFit function. +func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool { + for _, node := range nodes { + errors := NodeFit(nodeIndexer, pod, node) + if len(errors) == 0 { + klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) + return true + } else { + klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) + for _, err := range errors { + klog.V(4).InfoS(err.Error()) + } + } + } + return false +} + +// PodFitsCurrentNode checks if the given pod will fit onto the given node. The predicates used +// to determine if the pod will fit can be found in the NodeFit function. +func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool { + errors := NodeFit(nodeIndexer, pod, node) + if len(errors) == 0 { + klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) + return true + } else { + klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) + for _, err := range errors { + klog.V(4).InfoS(err.Error()) } } return false @@ -133,39 +194,95 @@ func IsNodeUnschedulable(node *v1.Node) bool { return node.Spec.Unschedulable } -// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on -// multiple criteria, like, pod node selector matching the node label, node -// being schedulable or not. -func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool { - for _, node := range nodes { +// fitsRequest determines if a pod can fit on a node based on its resource requests. It returns true if +// the pod will fit. +func fitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) { + var insufficientResources []error - ok, err := utils.PodMatchNodeSelector(pod, node) - if err != nil || !ok { - continue - } - if !IsNodeUnschedulable(node) { - klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node)) - return true - } + // Get pod requests + podRequests, _ := utils.PodRequestsAndLimits(pod) + resourceNames := make([]v1.ResourceName, 0, len(podRequests)) + for name := range podRequests { + resourceNames = append(resourceNames, name) } - return false -} - -// PodFitsCurrentNode checks if the given pod fits on the given node if the pod -// node selector matches the node label. -func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool { - ok, err := utils.PodMatchNodeSelector(pod, node) + availableResources, err := nodeAvailableResources(nodeIndexer, node, resourceNames) if err != nil { - klog.ErrorS(err, "Failed to match node selector") - return false + return false, []error{err} } - if !ok { - klog.V(2).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) - return false + podFitsOnNode := true + for _, resource := range resourceNames { + podResourceRequest := podRequests[resource] + availableResource, ok := availableResources[resource] + if !ok || podResourceRequest.MilliValue() > availableResource.MilliValue() { + insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource)) + podFitsOnNode = false + } + } + return podFitsOnNode, insufficientResources +} + +// nodeAvailableResources returns resources mapped to the quanitity available on the node. +func nodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) { + podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil) + if err != nil { + return nil, err + } + nodeUtilization := NodeUtilization(podsOnNode, resourceNames) + remainingResources := map[v1.ResourceName]*resource.Quantity{ + v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-nodeUtilization[v1.ResourceCPU].MilliValue(), resource.DecimalSI), + v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-nodeUtilization[v1.ResourceMemory].Value(), resource.BinarySI), + v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-nodeUtilization[v1.ResourcePods].Value(), resource.DecimalSI), + } + for _, name := range resourceNames { + if !IsBasicResource(name) { + if _, exists := node.Status.Allocatable[name]; exists { + allocatableResource := node.Status.Allocatable[name] + remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-nodeUtilization[name].Value(), resource.DecimalSI) + } else { + remainingResources[name] = resource.NewQuantity(0, resource.DecimalSI) + } + } + } + + return remainingResources, nil +} + +// NodeUtilization returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated. +func NodeUtilization(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity { + totalReqs := map[v1.ResourceName]*resource.Quantity{ + v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI), + v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI), + v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI), + } + for _, name := range resourceNames { + if !IsBasicResource(name) { + totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI) + } + } + + for _, pod := range pods { + req, _ := utils.PodRequestsAndLimits(pod) + for _, name := range resourceNames { + quantity, ok := req[name] + if ok && name != v1.ResourcePods { + // As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero, + // the format of the quantity will be updated to the format of y. + totalReqs[name].Add(quantity) + } + } + } + + return totalReqs +} + +// IsBasicResource checks if resource is basic native. +func IsBasicResource(name v1.ResourceName) bool { + switch name { + case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods: + return true + default: + return false } - - klog.V(2).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node)) - return true } diff --git a/pkg/descheduler/node/node_test.go b/pkg/descheduler/node/node_test.go index fc73c88c4..bd22542ec 100644 --- a/pkg/descheduler/node/node_test.go +++ b/pkg/descheduler/node/node_test.go @@ -21,9 +21,12 @@ import ( "testing" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/test" ) @@ -147,13 +150,13 @@ func TestPodFitsCurrentNode(t *testing.T) { }, }, }, - node: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, - }, - }, - }, + node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), success: true, }, { @@ -181,27 +184,48 @@ func TestPodFitsCurrentNode(t *testing.T) { }, }, }, - node: &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - nodeLabelKey: "no", - }, - }, - }, + node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: "no", + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), success: false, }, } for _, tc := range tests { - actual := PodFitsCurrentNode(tc.pod, tc.node) - if actual != tc.success { - t.Errorf("Test %#v failed", tc.description) - } + t.Run(tc.description, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var objs []runtime.Object + objs = append(objs, tc.node) + objs = append(objs, tc.pod) + + fakeClient := fake.NewSimpleClientset(objs...) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + podInformer := sharedInformerFactory.Core().V1().Pods() + + getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) + if err != nil { + t.Errorf("Build get pods assigned to node function error: %v", err) + } + + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + actual := PodFitsCurrentNode(getPodsAssignedToNode, tc.pod, tc.node) + if actual != tc.success { + t.Errorf("Test %#v failed", tc.description) + } + }) } } func TestPodFitsAnyOtherNode(t *testing.T) { - nodeLabelKey := "kubernetes.io/desiredNode" nodeLabelValue := "yes" nodeTaintKey := "hardware" @@ -215,238 +239,527 @@ func TestPodFitsAnyOtherNode(t *testing.T) { pod *v1.Pod nodes []*v1.Node success bool + podsOnNodes []*v1.Pod }{ { description: "Pod fits another node matching node affinity", - pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), + pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), nodes: []*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[0], - Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[1], - Labels: map[string]string{ - nodeLabelKey: "no", - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[2], - }, - }, + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: "no", + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), }, - success: true, + podsOnNodes: []*v1.Pod{}, + success: true, }, { description: "Pod expected to fit one of the nodes", - pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), + pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), nodes: []*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[0], - Labels: map[string]string{ - nodeLabelKey: "no", - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[1], - Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[2], - }, - }, + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: "no", + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), }, - success: true, + podsOnNodes: []*v1.Pod{}, + success: true, }, { description: "Pod expected to fit none of the nodes", - pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), + pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), nodes: []*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[0], - Labels: map[string]string{ - nodeLabelKey: "unfit1", - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[1], - Labels: map[string]string{ - nodeLabelKey: "unfit2", - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[2], - }, - }, + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: "unfit1", + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: "unfit2", + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), }, - success: false, + podsOnNodes: []*v1.Pod{}, + success: false, }, { description: "Nodes are unschedulable but labels match, should fail", - pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), + pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + }), nodes: []*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[0], - Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, - }, - }, - Spec: v1.NodeSpec{ - Unschedulable: true, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[1], - Labels: map[string]string{ - nodeLabelKey: "no", - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[2], - }, - }, + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Unschedulable = true + }), + test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: "no", + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), }, - success: false, + podsOnNodes: []*v1.Pod{}, + success: false, }, { - description: "Two nodes matches node selector, one of them is tained, should pass", - pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), + description: "Both nodes are tained, should fail", + pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), nodes: []*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[0], + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{}, + success: false, + }, + { + description: "Two nodes matches node selector, one of them is tained, there is a pod on the available node, and requests are low, should pass", + pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + nodes: []*v1.Node{ + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("test-pod", 12*1000, 20*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, + "test": "true", }, - }, - Spec: v1.NodeSpec{ - Taints: []v1.Taint{ - { - Key: nodeTaintKey, - Value: nodeTaintValue, - Effect: v1.TaintEffectNoSchedule, - }, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[1], - Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[2], - }, - }, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(40*1000*1000*1000, resource.DecimalSI) + }), }, success: true, }, { - description: "Both nodes are tained, should fail", - pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue), + description: "Two nodes matches node selector, one of them is tained, but CPU requests are too big, should fail", + pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), nodes: []*v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[0], + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + // Notice that this node only has 4 cores, the pod already on the node below requests 3 cores, and the pod above requests 2 cores + test.BuildTestNode(nodeNames[1], 4000, 8*1000*1000*1000, 12, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("3-core-pod", 3000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, + "test": "true", }, - }, - Spec: v1.NodeSpec{ - Taints: []v1.Taint{ - { - Key: nodeTaintKey, - Value: nodeTaintValue, - Effect: v1.TaintEffectNoSchedule, - }, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + }, + success: false, + }, + { + description: "Two nodes matches node selector, one of them is tained, but memory requests are too big, should fail", + pod: test.BuildTestPod("p1", 2000, 5*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + nodes: []*v1.Node{ + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[1], + } + }), + // Notice that this node only has 8GB of memory, the pod already on the node below requests 4GB, and the pod above requests 5GB + test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("4GB-mem-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", Labels: map[string]string{ - nodeLabelKey: nodeLabelValue, + "test": "true", }, - }, - Spec: v1.NodeSpec{ - Taints: []v1.Taint{ - { - Key: nodeTaintKey, - Value: nodeTaintValue, - Effect: v1.TaintEffectNoExecute, - }, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + }, + success: false, + }, + { + description: "Two nodes matches node selector, one of them is tained, but ephemeral storage requests are too big, should fail", + pod: test.BuildTestPod("p1", 2000, 4*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + nodes: []*v1.Node{ + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: nodeNames[2], - }, - }, + } + }), + // Notice that this node only has 20GB of storage, the pod already on the node below requests 11GB, and the pod above requests 10GB + test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(20*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("11GB-storage-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", + Labels: map[string]string{ + "test": "true", + }, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(11*1000*1000*1000, resource.DecimalSI) + }), + }, + success: false, + }, + { + description: "Two nodes matches node selector, one of them is tained, but custom resource requests are too big, should fail", + pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI) + }), + nodes: []*v1.Node{ + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI) + }), + // Notice that this node only has 15 of the custom resource, the pod already on the node below requests 10, and the pod above requests 10 + test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI) + node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("10-custom-resource-pod", 0, 0, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", + Labels: map[string]string{ + "test": "true", + }, + } + pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI) + }), + }, + success: false, + }, + { + description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, and pod Overhead is low enough, should pass", + pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + nodes: []*v1.Node{ + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + // Notice that this node has 5 CPU cores, the pod below requests 2 cores, and has CPU overhead of 1 cores, and the pod above requests 1 core + test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", + Labels: map[string]string{ + "test": "true", + }, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + pod.Spec.Overhead = createResourceList(1000, 1000*1000*1000, 1000*1000*1000) + }), + }, + success: true, + }, + { + description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, but pod Overhead is too high, should fail", + pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) { + pod.Spec.NodeSelector = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + }), + nodes: []*v1.Node{ + test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI) + node.Spec.Taints = []v1.Taint{ + { + Key: nodeTaintKey, + Value: nodeTaintValue, + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + // Notice that this node only has 5 CPU cores, the pod below requests 2 cores, but has CPU overhead of 2 cores, and the pod above requests 2 cores + test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + nodeLabelKey: nodeLabelValue, + } + + node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI) + }), + test.BuildTestNode(nodeNames[2], 0, 0, 0, nil), + }, + podsOnNodes: []*v1.Pod{ + test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) { + pod.ObjectMeta = metav1.ObjectMeta{ + Namespace: "test", + Labels: map[string]string{ + "test": "true", + }, + } + pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI) + pod.Spec.Overhead = createResourceList(2000, 1000*1000*1000, 1000*1000*1000) + }), }, success: false, }, } for _, tc := range tests { - actual := PodFitsAnyOtherNode(tc.pod, tc.nodes) - if actual != tc.success { - t.Errorf("Test %#v failed", tc.description) - } + t.Run(tc.description, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var objs []runtime.Object + for _, node := range tc.nodes { + objs = append(objs, node) + } + for _, pod := range tc.podsOnNodes { + objs = append(objs, pod) + } + objs = append(objs, tc.pod) + + fakeClient := fake.NewSimpleClientset(objs...) + + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + podInformer := sharedInformerFactory.Core().V1().Pods() + + getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) + if err != nil { + t.Errorf("Build get pods assigned to node function error: %v", err) + } + + sharedInformerFactory.Start(ctx.Done()) + sharedInformerFactory.WaitForCacheSync(ctx.Done()) + + actual := PodFitsAnyOtherNode(getPodsAssignedToNode, tc.pod, tc.nodes) + if actual != tc.success { + t.Errorf("Test %#v failed", tc.description) + } + }) } } -func createPodManifest(nodeName string, nodeSelectorKey string, nodeSelectorValue string) *v1.Pod { - return (&v1.Pod{ - Spec: v1.PodSpec{ - NodeName: nodeName, - Affinity: &v1.Affinity{ - NodeAffinity: &v1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ - NodeSelectorTerms: []v1.NodeSelectorTerm{ - { - MatchExpressions: []v1.NodeSelectorRequirement{ - { - Key: nodeSelectorKey, - Operator: "In", - Values: []string{ - nodeSelectorValue, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }) +// createResourceList builds a small resource list of core resources +func createResourceList(cpu int64, memory int64, ephemeralStorage int64) v1.ResourceList { + resourceList := make(map[v1.ResourceName]resource.Quantity) + resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(cpu, resource.DecimalSI) + resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.DecimalSI) + resourceList[v1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI) + return resourceList } diff --git a/pkg/descheduler/strategies/duplicates_test.go b/pkg/descheduler/strategies/duplicates_test.go index f2b6467d3..488bc4a3e 100644 --- a/pkg/descheduler/strategies/duplicates_test.go +++ b/pkg/descheduler/strategies/duplicates_test.go @@ -67,6 +67,7 @@ func TestFindDuplicatePods(t *testing.T) { Unschedulable: true, } }) + node6 := test.BuildTestNode("n6", 200, 200, 10, nil) p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) p1.Namespace = "dev" @@ -102,6 +103,14 @@ func TestFindDuplicatePods(t *testing.T) { p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil) p18.Namespace = "node-fit" + // This pod sits on node6 and is used to take up CPU requests on the node + p19 := test.BuildTestPod("CPU-eater", 150, 150, node6.Name, nil) + p19.Namespace = "test" + + // Dummy pod for node6 used to do the opposite of p19 + p20 := test.BuildTestPod("CPU-saver", 100, 150, node6.Name, nil) + p20.Namespace = "test" + // ### Evictable Pods ### // Three Pods in the "default" Namespace, bound to same ReplicaSet. 2 should be evicted. @@ -263,6 +272,20 @@ func TestFindDuplicatePods(t *testing.T) { expectedEvictedPodCount: 0, strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, }, + { + description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available does not have enough CPU, and nodeFit set to true. 0 should be evicted.", + pods: []*v1.Pod{p1, p2, p3, p19}, + nodes: []*v1.Node{node1, node6}, + expectedEvictedPodCount: 0, + strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, + }, + { + description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available has enough CPU, and nodeFit set to true. 1 should be evicted.", + pods: []*v1.Pod{p1, p2, p3, p20}, + nodes: []*v1.Node{node1, node6}, + expectedEvictedPodCount: 1, + strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, + }, } for _, testCase := range testCases { @@ -297,6 +320,7 @@ func TestFindDuplicatePods(t *testing.T) { nil, nil, testCase.nodes, + getPodsAssignedToNode, false, false, false, @@ -724,6 +748,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { nil, nil, testCase.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/descheduler/strategies/failedpods_test.go index 367418499..92b3fdf73 100644 --- a/pkg/descheduler/strategies/failedpods_test.go +++ b/pkg/descheduler/strategies/failedpods_test.go @@ -166,9 +166,12 @@ func TestRemoveFailedPods(t *testing.T) { { description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction", strategy: createStrategy(true, false, nil, nil, nil, true), - nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) { - node.Spec.Unschedulable = true - })}, + nodes: []*v1.Node{ + test.BuildTestNode("node1", 2000, 3000, 10, nil), + test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) { + node.Spec.Unschedulable = true + }), + }, expectedEvictedPodCount: 0, pods: []*v1.Pod{ buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{ @@ -176,6 +179,17 @@ func TestRemoveFailedPods(t *testing.T) { }), nil), }, }, + { + description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction", + strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, true), + nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []*v1.Pod{ + buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, + }), nil), + }, + }, { description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction", strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false), @@ -261,6 +275,7 @@ func TestRemoveFailedPods(t *testing.T) { nil, nil, tc.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index c768e6b78..832a1b6d4 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -95,8 +95,8 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter getPodsAssignedToNode, podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { return evictable.IsEvictable(pod) && - !nodeutil.PodFitsCurrentNode(pod, node) && - nodeutil.PodFitsAnyNode(pod, nodes) + !nodeutil.PodFitsCurrentNode(getPodsAssignedToNode, pod, node) && + nodeutil.PodFitsAnyNode(getPodsAssignedToNode, pod, nodes) }), ) if err != nil { diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/descheduler/strategies/node_affinity_test.go index df2b636e1..53f45d67c 100644 --- a/pkg/descheduler/strategies/node_affinity_test.go +++ b/pkg/descheduler/strategies/node_affinity_test.go @@ -222,6 +222,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { tc.maxPodsToEvictPerNode, tc.maxNoOfPodsToEvictPerNamespace, tc.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/node_taint_test.go b/pkg/descheduler/strategies/node_taint_test.go index 45a7cea58..ae2d06d2e 100644 --- a/pkg/descheduler/strategies/node_taint_test.go +++ b/pkg/descheduler/strategies/node_taint_test.go @@ -76,6 +76,11 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { createPreferNoScheduleTaint("testTaint", "test", 1), } + node6 := test.BuildTestNode("n6", 1, 1, 1, nil) + node6.Spec.Taints = []v1.Taint{ + createPreferNoScheduleTaint("testTaint", "test", 1), + } + p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil) p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil) @@ -289,6 +294,15 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { excludedTaints: []string{"testTaint1=test2"}, expectedEvictedPodCount: 1, // pod gets evicted, as excluded taint value does not match node1's taint value }, + { + description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.", + pods: []*v1.Pod{p2, p7, p9, p10}, + nodes: []*v1.Node{node1, node6}, + evictLocalStoragePods: false, + evictSystemCriticalPods: true, + expectedEvictedPodCount: 0, //p2 and p7 can't be evicted + nodeFit: true, + }, } for _, tc := range tests { @@ -324,6 +338,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { tc.maxPodsToEvictPerNode, tc.maxNoOfPodsToEvictPerNamespace, tc.nodes, + getPodsAssignedToNode, tc.evictLocalStoragePods, tc.evictSystemCriticalPods, false, diff --git a/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go b/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go index bcdcb4d23..6664fa196 100644 --- a/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/highnodeutilization.go @@ -83,7 +83,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate "Pods", thresholds[v1.ResourcePods], } for name := range thresholds { - if !isBasicResource(name) { + if !nodeutil.IsBasicResource(name) { keysAndValues = append(keysAndValues, string(name), int64(thresholds[name])) } } @@ -164,7 +164,7 @@ func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds targetThresholds[v1.ResourceMemory] = MaxResourcePercentage for name := range thresholds { - if !isBasicResource(name) { + if !nodeutil.IsBasicResource(name) { targetThresholds[name] = MaxResourcePercentage } } diff --git a/pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go b/pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go index 7d4c8bd60..b36fb5e29 100644 --- a/pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go +++ b/pkg/descheduler/strategies/nodeutilization/highnodeutilization_test.go @@ -385,6 +385,50 @@ func TestHighNodeUtilization(t *testing.T) { }, expectedPodsEvicted: 0, }, + { + name: "Other node does not have enough Memory", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 30, + v1.ResourcePods: 30, + }, + nodes: []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 200, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + }, + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) { + // A pod requesting more memory than is available on node1 + test.SetRSOwnerRef(pod) + }), + }, + expectedPodsEvicted: 0, + }, + { + name: "Other node does not have enough Memory", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 30, + v1.ResourcePods: 30, + }, + nodes: []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 200, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + }, + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) { + // A pod requesting more memory than is available on node1 + test.SetRSOwnerRef(pod) + }), + }, + expectedPodsEvicted: 0, + }, } for _, testCase := range testCases { @@ -463,6 +507,7 @@ func TestHighNodeUtilization(t *testing.T) { nil, nil, testCase.nodes, + getPodsAssignedToNode, false, false, false, @@ -668,6 +713,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) { &item.evictionsExpected, nil, item.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go b/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go index 3ae79722d..5579a6d6e 100644 --- a/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/lownodeutilization.go @@ -111,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg "Pods", thresholds[v1.ResourcePods], } for name := range thresholds { - if !isBasicResource(name) { + if !nodeutil.IsBasicResource(name) { keysAndValues = append(keysAndValues, string(name), int64(thresholds[name])) } } @@ -125,7 +125,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg "Pods", targetThresholds[v1.ResourcePods], } for name := range targetThresholds { - if !isBasicResource(name) { + if !nodeutil.IsBasicResource(name) { keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name])) } } diff --git a/pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go b/pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go index 6b2c9fa1f..3196825f6 100644 --- a/pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/nodeutilization/lownodeutilization_test.go @@ -772,6 +772,7 @@ func TestLowNodeUtilization(t *testing.T) { nil, nil, test.nodes, + getPodsAssignedToNode, false, false, false, @@ -1086,6 +1087,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) { &item.evictionsExpected, nil, item.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go index 8d29eeeaf..87f2ac34f 100644 --- a/pkg/descheduler/strategies/nodeutilization/nodeutilization.go +++ b/pkg/descheduler/strategies/nodeutilization/nodeutilization.go @@ -27,6 +27,8 @@ import ( "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + "sigs.k8s.io/descheduler/pkg/descheduler/node" + nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/pkg/utils" ) @@ -155,7 +157,7 @@ func getNodeUsage( nodeUsageList = append(nodeUsageList, NodeUsage{ node: node, - usage: nodeUtilization(node, pods, resourceNames), + usage: nodeutil.NodeUtilization(pods, resourceNames), allPods: pods, }) } @@ -268,7 +270,7 @@ func evictPodsFromSourceNodes( "Pods", totalAvailableUsage[v1.ResourcePods].Value(), } for name := range totalAvailableUsage { - if !isBasicResource(name) { + if !node.IsBasicResource(name) { keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value()) } } @@ -338,7 +340,7 @@ func evictPods( "Pods", nodeInfo.usage[v1.ResourcePods].Value(), } for name := range totalAvailableUsage { - if !isBasicResource(name) { + if !nodeutil.IsBasicResource(name) { keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value()) } } @@ -361,7 +363,7 @@ func sortNodesByUsage(nodes []NodeInfo, ascending bool) { // extended resources for name := range nodes[i].usage { - if !isBasicResource(name) { + if !nodeutil.IsBasicResource(name) { ti = ti + nodes[i].usage[name].Value() tj = tj + nodes[j].usage[name].Value() } @@ -411,43 +413,6 @@ func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName { return resourceNames } -// isBasicResource checks if resource is basic native. -func isBasicResource(name v1.ResourceName) bool { - switch name { - case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods: - return true - default: - return false - } -} - -func nodeUtilization(node *v1.Node, pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity { - totalReqs := map[v1.ResourceName]*resource.Quantity{ - v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI), - v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI), - v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI), - } - for _, name := range resourceNames { - if !isBasicResource(name) { - totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI) - } - } - - for _, pod := range pods { - req, _ := utils.PodRequestsAndLimits(pod) - for _, name := range resourceNames { - quantity, ok := req[name] - if ok && name != v1.ResourcePods { - // As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero, - // the format of the quantity will be updated to the format of y. - totalReqs[name].Add(quantity) - } - } - } - - return totalReqs -} - func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) { var nonRemovablePods, removablePods []*v1.Pod @@ -472,7 +437,7 @@ func averageNodeBasicresources(nodes []*v1.Node, getPodsAssignedToNode podutil.G numberOfNodes-- continue } - usage := nodeUtilization(node, pods, resourceNames) + usage := nodeutil.NodeUtilization(pods, resourceNames) nodeCapacity := node.Status.Capacity if len(node.Status.Allocatable) > 0 { nodeCapacity = node.Status.Allocatable diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/descheduler/strategies/pod_antiaffinity_test.go index df5f33057..ff8fc429f 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity_test.go +++ b/pkg/descheduler/strategies/pod_antiaffinity_test.go @@ -47,6 +47,7 @@ func TestPodAntiAffinity(t *testing.T) { Unschedulable: true, } }) + node4 := test.BuildTestNode("n4", 2, 2, 1, nil) p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil) @@ -174,6 +175,14 @@ func TestPodAntiAffinity(t *testing.T) { nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, }, + { + description: "Won't evict pods because only other node doesn't have enough resources", + maxPodsToEvictPerNode: &uint3, + pods: []*v1.Pod{p1, p2, p3, p4}, + nodes: []*v1.Node{node1, node4}, + expectedEvictedPodCount: 0, + nodeFit: true, + }, } for _, test := range tests { @@ -209,6 +218,7 @@ func TestPodAntiAffinity(t *testing.T) { test.maxPodsToEvictPerNode, test.maxNoOfPodsToEvictPerNamespace, test.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go index 697250cba..8c08cab62 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -298,6 +298,7 @@ func TestPodLifeTime(t *testing.T) { nil, nil, tc.nodes, + getPodsAssignedToNode, false, false, tc.ignorePvcPods, diff --git a/pkg/descheduler/strategies/toomanyrestarts_test.go b/pkg/descheduler/strategies/toomanyrestarts_test.go index aa7bec2e7..8a1ba94f3 100644 --- a/pkg/descheduler/strategies/toomanyrestarts_test.go +++ b/pkg/descheduler/strategies/toomanyrestarts_test.go @@ -97,8 +97,10 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { Unschedulable: true, } }) + node4 := test.BuildTestNode("node4", 200, 3000, 10, nil) + node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil) - pods := initPods(node1) + pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil)) createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32, nodeFit bool) api.DeschedulerStrategy { return api.DeschedulerStrategy{ @@ -199,6 +201,20 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { expectedEvictedPodCount: 0, maxPodsToEvictPerNode: &uint3, }, + { + description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node does not have enough CPU, 0 pod evictions", + strategy: createStrategy(true, true, 1, true), + nodes: []*v1.Node{node1, node4}, + expectedEvictedPodCount: 0, + maxPodsToEvictPerNode: &uint3, + }, + { + description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node has enough CPU, 3 pod evictions", + strategy: createStrategy(true, true, 1, true), + nodes: []*v1.Node{node1, node5}, + expectedEvictedPodCount: 3, + maxPodsToEvictPerNode: &uint3, + }, } for _, tc := range tests { @@ -234,6 +250,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { tc.maxPodsToEvictPerNode, tc.maxNoOfPodsToEvictPerNamespace, tc.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 2f0fda87a..035896bbc 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -18,20 +18,18 @@ package strategies import ( "context" - "fmt" "math" "sort" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - utilerrors "k8s.io/apimachinery/pkg/util/errors" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" + "sigs.k8s.io/descheduler/pkg/descheduler/node" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation" "sigs.k8s.io/descheduler/pkg/utils" @@ -170,7 +168,7 @@ func RemovePodsViolatingTopologySpreadConstraint( klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint) continue } - balanceDomains(podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodeMap) + balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodes) } } @@ -225,12 +223,14 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol // [5, 5, 5, 5, 5, 5] // (assuming even distribution by the scheduler of the evicted pods) func balanceDomains( + client clientset.Interface, + getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, podsForEviction map[*v1.Pod]struct{}, constraint v1.TopologySpreadConstraint, constraintTopologies map[topologyPair][]*v1.Pod, sumPods float64, - isEvictable func(*v1.Pod) bool, - nodeMap map[string]*v1.Node) { + isEvictable func(pod *v1.Pod) bool, + nodes []*v1.Node) { idealAvg := sumPods / float64(len(constraintTopologies)) sortedDomains := sortDomains(constraintTopologies, isEvictable) @@ -273,8 +273,19 @@ func balanceDomains( // also (just for tracking), add them to the list of pods in the lower topology aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:] for k := range aboveToEvict { - if err := validatePodFitsOnOtherNodes(aboveToEvict[k], nodeMap); err != nil { - klog.V(2).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(aboveToEvict[k])) + // PodFitsAnyOtherNode excludes the current node because, for the sake of domain balancing only, we care about if there is any other + // place it could theoretically fit. + // If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading + // Also, if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node, + // don't bother evicting it as it will just end up back on the same node + // however we still account for it "being evicted" so the algorithm can complete + // TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod + // but still try to get the required # of movePods (instead of just chopping that value off the slice above). + // In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes. + // This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm. + // So, a better selection heuristic could improve performance. + if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) { + klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k])) continue } @@ -285,56 +296,6 @@ func balanceDomains( } } -// validatePodFitsOnOtherNodes performs validation based on scheduling predicates for affinity and toleration. -// It excludes the current node because, for the sake of domain balancing only, we care about if there is any other -// place it could theoretically fit. -// If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading -func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error { - // if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node, - // don't bother evicting it as it will just end up back on the same node - // however we still account for it "being evicted" so the algorithm can complete - // TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod - // but still try to get the required # of movePods (instead of just chopping that value off the slice above) - isRequiredDuringSchedulingIgnoredDuringExecution := pod.Spec.Affinity != nil && - pod.Spec.Affinity.NodeAffinity != nil && - pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil - - hardTaintsFilter := func(taint *v1.Taint) bool { - return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute - } - - var eligibleNodesCount, ineligibleAffinityNodesCount, ineligibleTaintedNodesCount int - for _, node := range nodeMap { - if node == nodeMap[pod.Spec.NodeName] { - continue - } - if pod.Spec.NodeSelector != nil || isRequiredDuringSchedulingIgnoredDuringExecution { - if !nodeutil.PodFitsCurrentNode(pod, node) { - ineligibleAffinityNodesCount++ - continue - } - } - if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, hardTaintsFilter) { - ineligibleTaintedNodesCount++ - continue - } - - eligibleNodesCount++ - } - - if eligibleNodesCount == 0 { - var errs []error - if ineligibleAffinityNodesCount > 0 { - errs = append(errs, fmt.Errorf("%d nodes with ineligible selector/affinity", ineligibleAffinityNodesCount)) - } - if ineligibleTaintedNodesCount > 0 { - errs = append(errs, fmt.Errorf("%d nodes with taints that are not tolerated", ineligibleTaintedNodesCount)) - } - return utilerrors.NewAggregate(errs) - } - return nil -} - // sortDomains sorts and splits the list of topology domains based on their size // it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order: // 1. non-evictable pods @@ -342,7 +303,7 @@ func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error // 3. pods in descending priority // 4. all other pods // We then pop pods off the back of the list for eviction -func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(*v1.Pod) bool) []topology { +func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(pod *v1.Pod) bool) []topology { sortedTopologies := make([]topology, 0, len(constraintTopologyPairs)) // sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted) for pair, list := range constraintTopologyPairs { diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index 22a863651..f2b8ac8fa 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -483,6 +483,38 @@ func TestTopologySpreadConstraint(t *testing.T) { }, namespaces: []string{"ns1"}, }, + { + name: "2 domains size [2 6], maxSkew=2, can't move any because node1 does not have enough CPU", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 200, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(2), + }, + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + }, + { + count: 6, + node: "n2", + labels: map[string]string{"foo": "bar"}, + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{ + NodeFit: true, + }, + }, + namespaces: []string{"ns1"}, + }, { // see https://github.com/kubernetes-sigs/descheduler/issues/564 name: "Multiple constraints (6 nodes/2 zones, 4 pods)", @@ -686,7 +718,7 @@ func TestTopologySpreadConstraint(t *testing.T) { namespaces: []string{"ns1"}, }, { - name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node", + name: "2 domains, sizes [2,0], maxSkew=1, move 1 pods since pod does not tolerate the tainted node", nodes: []*v1.Node{ test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { @@ -718,6 +750,43 @@ func TestTopologySpreadConstraint(t *testing.T) { strategy: api.DeschedulerStrategy{}, namespaces: []string{"ns1"}, }, + { + name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node, and NodeFit is enabled", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneB" + n.Spec.Taints = []v1.Taint{ + { + Key: "taint-test", + Value: "test", + Effect: v1.TaintEffectNoSchedule, + }, + } + }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "n1", + labels: map[string]string{"foo": "bar"}, + nodeSelector: map[string]string{"zone": "zoneA"}, + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{ + NodeFit: true, + }, + }, + namespaces: []string{"ns1"}, + }, { name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint", nodes: []*v1.Node{ @@ -902,6 +971,7 @@ func TestTopologySpreadConstraint(t *testing.T) { nil, nil, tc.nodes, + getPodsAssignedToNode, false, false, false, diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go index 245d7bee2..2c580206f 100644 --- a/pkg/utils/pod.go +++ b/pkg/utils/pod.go @@ -6,25 +6,9 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilfeature "k8s.io/apiserver/pkg/util/feature" - "k8s.io/component-base/featuregate" "k8s.io/klog/v2" ) -const ( - // owner: @jinxu - // beta: v1.10 - // - // New local storage types to support local storage capacity isolation - LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation" - - // owner: @egernst - // alpha: v1.16 - // - // Enables PodOverhead, for accounting pod overheads which are specific to a given RuntimeClass - PodOverhead featuregate.Feature = "PodOverhead" -) - // GetResourceRequest finds and returns the request value for a specific resource. func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 { if resource == v1.ResourcePods { @@ -53,11 +37,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou requestQuantity = resource.Quantity{Format: resource.DecimalSI} } - if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(LocalStorageCapacityIsolation) { - // if the local storage capacity isolation feature gate is disabled, pods request 0 disk - return requestQuantity - } - for _, container := range pod.Spec.Containers { if rQuantity, ok := container.Resources.Requests[resourceName]; ok { requestQuantity.Add(rQuantity) @@ -72,9 +51,9 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou } } - // if PodOverhead feature is supported, add overhead for running a pod - // to the total requests if the resource total is non-zero - if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) { + // We assume pod overhead feature gate is enabled. + // We can't import the scheduler settings so we will inherit the default. + if pod.Spec.Overhead != nil { if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() { requestQuantity.Add(podOverhead) } @@ -162,9 +141,9 @@ func PodRequestsAndLimits(pod *v1.Pod) (reqs, limits v1.ResourceList) { maxResourceList(limits, container.Resources.Limits) } - // if PodOverhead feature is supported, add overhead for running a pod - // to the sum of reqeuests and to non-zero limits: - if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) { + // We assume pod overhead feature gate is enabled. + // We can't import the scheduler settings so we will inherit the default. + if pod.Spec.Overhead != nil { addResourceList(reqs, pod.Spec.Overhead) for name, quantity := range pod.Spec.Overhead { diff --git a/test/e2e/e2e_duplicatepods_test.go b/test/e2e/e2e_duplicatepods_test.go index e95504055..961636023 100644 --- a/test/e2e/e2e_duplicatepods_test.go +++ b/test/e2e/e2e_duplicatepods_test.go @@ -144,6 +144,7 @@ func TestRemoveDuplicates(t *testing.T) { nil, nil, nodes, + getPodsAssignedToNode, true, false, false, diff --git a/test/e2e/e2e_failedpods_test.go b/test/e2e/e2e_failedpods_test.go index 95e55ee85..b033ce28a 100644 --- a/test/e2e/e2e_failedpods_test.go +++ b/test/e2e/e2e_failedpods_test.go @@ -83,7 +83,7 @@ func TestFailedPods(t *testing.T) { defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) - podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes) t.Logf("Running RemoveFailedPods strategy for %s", name) strategies.RemoveFailedPods( diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 881ab789f..88a945ec3 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -39,7 +39,6 @@ import ( v1qos "k8s.io/kubectl/pkg/util/qos" "sigs.k8s.io/descheduler/cmd/descheduler/app/options" - "sigs.k8s.io/descheduler/pkg/api" deschedulerapi "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler" "sigs.k8s.io/descheduler/pkg/descheduler/client" @@ -199,6 +198,7 @@ func runPodLifetimeStrategy( nil, nil, nodes, + getPodsAssignedToNode, false, evictCritical, false, @@ -324,7 +324,7 @@ func TestLowNodeUtilization(t *testing.T) { waitForRCPodsRunning(ctx, t, clientSet, rc) // Run LowNodeUtilization strategy - podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes) podFilter, err := podutil.NewOptions().WithFilter(podEvictor.Evictable().IsEvictable).BuildFilterFunc() if err != nil { @@ -886,17 +886,6 @@ func TestEvictAnnotation(t *testing.T) { clientSet, nodeInformer, getPodsAssignedToNode, stopCh := initializeClient(t) defer close(stopCh) - nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) - if err != nil { - t.Errorf("Error listing node with %v", err) - } - - var nodes []*v1.Node - for i := range nodeList.Items { - node := nodeList.Items[i] - nodes = append(nodes, &node) - } - testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { t.Fatalf("Unable to create ns %v", testNamespace.Name) @@ -977,7 +966,7 @@ func TestDeschedulingInterval(t *testing.T) { } s.Client = clientSet - deschedulerPolicy := &api.DeschedulerPolicy{} + deschedulerPolicy := &deschedulerapi.DeschedulerPolicy{} c := make(chan bool, 1) go func() { @@ -1349,7 +1338,7 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) { return allNodes, workerNodes } -func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor { +func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, nodes []*v1.Node) *evictions.PodEvictor { evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) if err != nil || len(evictionPolicyGroupVersion) == 0 { t.Fatalf("Error creating eviction policy group: %v", err) @@ -1361,6 +1350,7 @@ func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []* nil, nil, nodes, + getPodsAssignedToNode, true, false, false, diff --git a/test/e2e/e2e_toomanyrestarts_test.go b/test/e2e/e2e_toomanyrestarts_test.go index 8635caffd..8eccd60a4 100644 --- a/test/e2e/e2e_toomanyrestarts_test.go +++ b/test/e2e/e2e_toomanyrestarts_test.go @@ -137,6 +137,7 @@ func TestTooManyRestarts(t *testing.T) { nil, nil, nodes, + getPodsAssignedToNode, true, false, false, diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go index 55bfac50e..2766eaafa 100644 --- a/test/e2e/e2e_topologyspreadconstraint_test.go +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -77,7 +77,7 @@ func TestTopologySpreadConstraint(t *testing.T) { defer deleteRC(ctx, t, clientSet, violatorRc) waitForRCPodsRunning(ctx, t, clientSet, violatorRc) - podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes) // Run TopologySpreadConstraint strategy t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name)