1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

NodeFit parameter now considers pod requests

This commit is contained in:
RyanDevlin
2022-01-12 11:49:01 -05:00
committed by Jan Chaloupka
parent eac3b4b54a
commit 16eb9063b6
27 changed files with 1497 additions and 648 deletions

View File

@@ -739,6 +739,7 @@ The following strategies accept a `nodeFit` boolean parameter which can optimize
- A `nodeSelector` on the pod
- Any `Tolerations` on the pod and any `Taints` on the other nodes
- `nodeAffinity` on the pod
- Resource `Requests` made by the pod and the resources available on other nodes
- Whether any of the other nodes are marked as `unschedulable`
E.g.

View File

@@ -283,6 +283,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
nodes,
getPodsAssignedToNode,
evictLocalStoragePods,
evictSystemCriticalPods,
ignorePvcPods,

View File

@@ -51,6 +51,7 @@ type namespacePodEvictCount map[string]uint
type PodEvictor struct {
client clientset.Interface
nodes []*v1.Node
nodeIndexer podutil.GetPodsAssignedToNodeFunc
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
@@ -71,6 +72,7 @@ func NewPodEvictor(
maxPodsToEvictPerNode *uint,
maxPodsToEvictPerNamespace *uint,
nodes []*v1.Node,
nodeIndexer podutil.GetPodsAssignedToNodeFunc,
evictLocalStoragePods bool,
evictSystemCriticalPods bool,
ignorePvcPods bool,
@@ -87,6 +89,7 @@ func NewPodEvictor(
return &PodEvictor{
client: client,
nodes: nodes,
nodeIndexer: nodeIndexer,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
@@ -296,7 +299,7 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
}
if options.nodeFit {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if !nodeutil.PodFitsAnyOtherNode(pod, pe.nodes) {
if !nodeutil.PodFitsAnyOtherNode(pe.nodeIndexer, pod, pe.nodes) {
return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable")
}
return nil

View File

@@ -21,8 +21,10 @@ import (
"testing"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
@@ -80,9 +82,9 @@ func TestIsEvictable(t *testing.T) {
nodeLabelKey := "datacenter"
nodeLabelValue := "east"
type testCase struct {
pod *v1.Pod
description string
pods []*v1.Pod
nodes []*v1.Node
runBefore func(*v1.Pod, []*v1.Node)
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
@@ -92,261 +94,309 @@ func TestIsEvictable(t *testing.T) {
}
testCases := []testCase{
{ // Failed pod eviction with no ownerRefs.
pod: test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Status.Phase = v1.PodFailed
{
description: "Failed pod eviction with no ownerRefs",
pods: []*v1.Pod{
test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Status.Phase = v1.PodFailed
}),
},
evictFailedBarePods: false,
result: false,
}, { // Normal pod eviction with no ownerRefs and evictFailedBarePods enabled
pod: test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
},
}, {
description: "Normal pod eviction with no ownerRefs and evictFailedBarePods enabled",
pods: []*v1.Pod{test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil)},
evictFailedBarePods: true,
result: false,
}, { // Failed pod eviction with no ownerRefs
pod: test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Status.Phase = v1.PodFailed
}, {
description: "Failed pod eviction with no ownerRefs",
pods: []*v1.Pod{
test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Status.Phase = v1.PodFailed
}),
},
evictFailedBarePods: true,
result: true,
}, { // Normal pod eviction with normal ownerRefs
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}, {
description: "Normal pod eviction with normal ownerRefs",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}, {
description: "Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with replicaSet ownerRefs
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
}, {
description: "Normal pod eviction with replicaSet ownerRefs",
pods: []*v1.Pod{
test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
}, {
description: "Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p4", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with statefulSet ownerRefs
pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
}, {
description: "Normal pod eviction with statefulSet ownerRefs",
pods: []*v1.Pod{
test.BuildTestPod("p18", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
}, {
description: "Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p19", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
}, {
description: "Pod not evicted because it is bound to a PV and evictLocalStoragePods = false",
pods: []*v1.Pod{
test.BuildTestPod("p5", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
},
}
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
}, {
description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = true",
pods: []*v1.Pod{
test.BuildTestPod("p6", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
},
}
}
}),
},
evictLocalStoragePods: true,
evictSystemCriticalPods: false,
result: true,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
}, {
description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p7", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
},
}
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it is part of a daemonSet
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
}, {
description: "Pod not evicted becasuse it is part of a daemonSet",
pods: []*v1.Pod{
test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
}, {
description: "Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it is a mirror pod
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
}, {
description: "Pod not evicted becasuse it is a mirror poddsa",
pods: []*v1.Pod{
test.BuildTestPod("p10", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
}, {
description: "Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it has system critical priority
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}, {
description: "Pod not evicted becasuse it has system critical priority",
pods: []*v1.Pod{
test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true",
}
}, {
description: "Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p13", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true",
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold
pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
}, {
description: "Pod not evicted becasuse it has a priority higher than the configured priority threshold",
pods: []*v1.Pod{
test.BuildTestPod("p14", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
priorityThreshold: &lowPriority,
result: false,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
}, {
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p15", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}, {
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true",
pods: []*v1.Pod{
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}, {
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
}, {
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true",
pods: []*v1.Pod{
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
}, {
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod with no tolerations running on normal node, all other nodes tainted
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
}, {
description: "Pod with no tolerations running on normal node, all other nodes tainted",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
@@ -354,27 +404,8 @@ func TestIsEvictable(t *testing.T) {
Effect: v1.TaintEffectNoSchedule,
},
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, { // Pod with correct tolerations running on normal node, all other nodes tainted
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.Tolerations = []v1.Toleration{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
@@ -382,81 +413,259 @@ func TestIsEvictable(t *testing.T) {
Effect: v1.TaintEffectNoSchedule,
},
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, { // Pod with incorrect node selector
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: "fail",
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, { // Pod with correct node selector
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}, {
description: "Pod with correct tolerations running on normal node, all other nodes tainted",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Tolerations = []v1.Toleration{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, {
description: "Pod with incorrect node selector",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: "fail",
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, {
description: "Pod with correct node selector",
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, {
description: "Pod with correct node selector, but only available node doesn't have enough CPU",
pods: []*v1.Pod{
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2-TEST", 10, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3-TEST", 10, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, {
description: "Pod with correct node selector, and one node has enough memory",
pods: []*v1.Pod{
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestPod("node2-pod-10GB-mem", 20, 10, "node2", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
test.BuildTestPod("node3-pod-10GB-mem", 20, 10, "node3", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3", 100, 20, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, {
description: "Pod with correct node selector, but both nodes don't have enough memory",
pods: []*v1.Pod{
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestPod("node2-pod-10GB-mem", 10, 10, "node2", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
test.BuildTestPod("node3-pod-10GB-mem", 10, 10, "node3", func(pod *v1.Pod) {
pod.ObjectMeta.Labels = map[string]string{
"test": "true",
}
}),
},
nodes: []*v1.Node{
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
test.BuildTestNode("node3", 100, 16, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
},
}
for _, test := range testCases {
test.runBefore(test.pod, test.nodes)
nodes := append(test.nodes, n1)
podEvictor := &PodEvictor{
evictLocalStoragePods: test.evictLocalStoragePods,
evictSystemCriticalPods: test.evictSystemCriticalPods,
evictFailedBarePods: test.evictFailedBarePods,
nodes: nodes,
}
t.Run(test.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
evictable := podEvictor.Evictable()
var opts []func(opts *Options)
if test.priorityThreshold != nil {
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
}
if test.nodeFit {
opts = append(opts, WithNodeFit(true))
}
evictable = podEvictor.Evictable(opts...)
nodes := append(test.nodes, n1)
result := evictable.IsEvictable(test.pod)
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
}
var objs []runtime.Object
for _, node := range test.nodes {
objs = append(objs, node)
}
for _, pod := range test.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := &PodEvictor{
client: fakeClient,
nodes: nodes,
nodeIndexer: getPodsAssignedToNode,
policyGroupVersion: policyv1.SchemeGroupVersion.String(),
dryRun: false,
maxPodsToEvictPerNode: nil,
maxPodsToEvictPerNamespace: nil,
evictLocalStoragePods: test.evictLocalStoragePods,
evictSystemCriticalPods: test.evictSystemCriticalPods,
evictFailedBarePods: test.evictFailedBarePods,
}
var opts []func(opts *Options)
if test.priorityThreshold != nil {
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
}
if test.nodeFit {
opts = append(opts, WithNodeFit(true))
}
evictable := podEvictor.Evictable(opts...)
result := evictable.IsEvictable(test.pods[0])
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pods[0].Name, test.result, result)
}
})
}
}
func TestPodTypes(t *testing.T) {

View File

@@ -18,13 +18,16 @@ package node
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
@@ -96,32 +99,91 @@ func IsReady(node *v1.Node) bool {
return true
}
// PodFitsAnyOtherNode checks if the given pod fits any of the given nodes, besides the node
// the pod is already running on. The node fit is based on multiple criteria, like, pod node selector
// matching the node label (including affinity), the taints on the node, and the node being schedulable or not.
func PodFitsAnyOtherNode(pod *v1.Pod, nodes []*v1.Node) bool {
// NodeFit returns true if the provided pod can probably be scheduled onto the provided node.
// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
// This function currently considers a subset of the Kubernetes Scheduler's predicates when
// deciding if a pod would fit on a node, but more predicates may be added in the future.
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
// Check node selector and required affinity
var errors []error
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil {
errors = append(errors, err)
} else if !ok {
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
}
// Check taints (we only care about NoSchedule and NoExecute taints)
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
})
if !ok {
errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
}
// Check if the pod can fit on a node based off it's requests
ok, reqErrors := FitsRequest(nodeIndexer, pod, node)
if !ok {
errors = append(errors, reqErrors...)
}
// Check if node is schedulable
if IsNodeUnschedulable(node) {
errors = append(errors, fmt.Errorf("node is not schedulable"))
}
return errors
}
// PodFitsAnyOtherNode checks if the given pod will probably fit any of the given nodes, besides the node
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// Skip node pod is already on
if node.Name == pod.Spec.NodeName {
continue
}
// Check node selector and required affinity
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok {
continue
}
// Check taints (we only care about NoSchedule and NoExecute taints)
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
})
if !ok {
continue
}
// Check if node is schedulable
if !IsNodeUnschedulable(node) {
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
}
return false
}
// PodFitsAnyNode checks if the given pod will probably fit any of the given nodes. The predicates used
// to determine if the pod will fit can be found in the NodeFit function.
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
}
return false
}
// PodFitsCurrentNode checks if the given pod will probably fit onto the given node. The predicates used
// to determine if the pod will fit can be found in the NodeFit function.
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
return false
@@ -133,39 +195,102 @@ func IsNodeUnschedulable(node *v1.Node) bool {
return node.Spec.Unschedulable
}
// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on
// multiple criteria, like, pod node selector matching the node label, node
// being schedulable or not.
func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// FitsRequest determines if a pod can fit on a node based on that pod's requests. It returns true if
// the pod will fit.
func FitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
var insufficientResources []error
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok {
continue
}
if !IsNodeUnschedulable(node) {
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}
// Get pod requests
podRequests, _ := utils.PodRequestsAndLimits(pod)
resourceNames := make([]v1.ResourceName, 0, len(podRequests))
for name := range podRequests {
resourceNames = append(resourceNames, name)
}
return false
}
// PodFitsCurrentNode checks if the given pod fits on the given node if the pod
// node selector matches the node label.
func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
ok, err := utils.PodMatchNodeSelector(pod, node)
availableResources, err := NodeAvailableResources(nodeIndexer, node, resourceNames)
if err != nil {
klog.ErrorS(err, "Failed to match node selector")
return false
return false, []error{err}
}
if !ok {
klog.V(2).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return false
}
podFitsOnNode := true
for _, resource := range resourceNames {
podResourceRequest := podRequests[resource]
var requestTooLarge bool
switch resource {
case v1.ResourceCPU:
requestTooLarge = podResourceRequest.MilliValue() > availableResources[resource].MilliValue()
default:
requestTooLarge = podResourceRequest.Value() > availableResources[resource].Value()
}
klog.V(2).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
if requestTooLarge {
insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
podFitsOnNode = false
}
}
return podFitsOnNode, insufficientResources
}
// NodeAvailableResources returns resources mapped to the quanitity available on the node.
func NodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
if err != nil {
return nil, err
}
aggregatePodRequests := AggregatePodRequests(podsOnNode, resourceNames)
return nodeRemainingResources(node, aggregatePodRequests, resourceNames), nil
}
// AggregatePodRequests returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
func AggregatePodRequests(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
}
for _, name := range resourceNames {
if !IsBasicResource(name) {
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for _, name := range resourceNames {
quantity, ok := req[name]
if ok && name != v1.ResourcePods {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
return totalReqs
}
func nodeRemainingResources(node *v1.Node, aggregatePodRequests map[v1.ResourceName]*resource.Quantity, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
remainingResources := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-aggregatePodRequests[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-aggregatePodRequests[v1.ResourceMemory].Value(), resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-aggregatePodRequests[v1.ResourcePods].Value(), resource.DecimalSI),
}
for _, name := range resourceNames {
if !IsBasicResource(name) {
allocatableResource := node.Status.Allocatable[name]
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-aggregatePodRequests[name].Value(), resource.DecimalSI)
}
}
return remainingResources
}
// IsBasicResource checks if resource is basic native.
func IsBasicResource(name v1.ResourceName) bool {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
return true
default:
return false
}
}

View File

@@ -21,9 +21,13 @@ import (
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test"
)
@@ -147,13 +151,13 @@ func TestPodFitsCurrentNode(t *testing.T) {
},
},
},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
success: true,
},
{
@@ -181,27 +185,53 @@ func TestPodFitsCurrentNode(t *testing.T) {
},
},
},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
success: false,
},
}
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: nil}, nil
})
for _, tc := range tests {
actual := PodFitsCurrentNode(tc.pod, tc.node)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
objs = append(objs, tc.node)
objs = append(objs, tc.pod)
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
actual := PodFitsCurrentNode(getPodsAssignedToNode, tc.pod, tc.node)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
})
}
}
func TestPodFitsAnyOtherNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeTaintKey := "hardware"
@@ -215,238 +245,527 @@ func TestPodFitsAnyOtherNode(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
success bool
podsOnNodes []*v1.Pod
}{
{
description: "Pod fits another node matching node affinity",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
success: true,
podsOnNodes: []*v1.Pod{},
success: true,
},
{
description: "Pod expected to fit one of the nodes",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
success: true,
podsOnNodes: []*v1.Pod{},
success: true,
},
{
description: "Pod expected to fit none of the nodes",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "unfit1",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "unfit2",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "unfit1",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "unfit2",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
success: false,
podsOnNodes: []*v1.Pod{},
success: false,
},
{
description: "Nodes are unschedulable but labels match, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Unschedulable: true,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Unschedulable = true
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
success: false,
podsOnNodes: []*v1.Pod{},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, should pass",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
description: "Both nodes are tained, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, there is a pod on the available node, and requests are low, should pass",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("test-pod", 12*1000, 20*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
"test": "true",
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(40*1000*1000*1000, resource.DecimalSI)
}),
},
success: true,
},
{
description: "Both nodes are tained, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
description: "Two nodes matches node selector, one of them is tained, but CPU requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 4 cores, the pod already on the node below requests 3 cores, and the pod above requests 2 cores
test.BuildTestNode(nodeNames[1], 4000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 3000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
"test": "true",
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but memory requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 5*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
}
}),
// Notice that this node only has 8GB of memory, the pod already on the node below requests 4GB, and the pod above requests 5GB
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("4GB-mem-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
"test": "true",
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoExecute,
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but ephemeral storage requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 4*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
}
}),
// Notice that this node only has 20GB of storage, the pod already on the node below requests 11GB, and the pod above requests 10GB
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(20*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("11GB-storage-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(11*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but custom resource requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
}),
// Notice that this node only has 15 of the custom resource, the pod already on the node below requests 10, and the pod above requests 10
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("10-custom-resource-pod", 0, 0, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, and pod Overhead is low enough, should pass",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node has 5 CPU cores, the pod below requests 2 cores, and has CPU overhead of 1 cores, and the pod above requests 1 core
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Overhead = createResourceList(1000, 1000*1000*1000, 1000*1000*1000)
}),
},
success: true,
},
{
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, but pod Overhead is too high, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 5 CPU cores, the pod below requests 2 cores, but has CPU overhead of 2 cores, and the pod above requests 2 cores
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Overhead = createResourceList(2000, 1000*1000*1000, 1000*1000*1000)
}),
},
success: false,
},
}
for _, tc := range tests {
actual := PodFitsAnyOtherNode(tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.podsOnNodes {
objs = append(objs, pod)
}
objs = append(objs, tc.pod)
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
actual := PodFitsAnyOtherNode(getPodsAssignedToNode, tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
})
}
}
func createPodManifest(nodeName string, nodeSelectorKey string, nodeSelectorValue string) *v1.Pod {
return (&v1.Pod{
Spec: v1.PodSpec{
NodeName: nodeName,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeSelectorKey,
Operator: "In",
Values: []string{
nodeSelectorValue,
},
},
},
},
},
},
},
},
},
})
// createResourceList builds a small resource list of core resources
func createResourceList(cpu int64, memory int64, ephemeralStorage int64) v1.ResourceList {
resourceList := make(map[v1.ResourceName]resource.Quantity)
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(cpu, resource.DecimalSI)
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.DecimalSI)
resourceList[v1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI)
return resourceList
}

View File

@@ -67,6 +67,7 @@ func TestFindDuplicatePods(t *testing.T) {
Unschedulable: true,
}
})
node6 := test.BuildTestNode("n6", 200, 200, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p1.Namespace = "dev"
@@ -102,6 +103,14 @@ func TestFindDuplicatePods(t *testing.T) {
p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil)
p18.Namespace = "node-fit"
// This pod sits on node6 and is used to take up CPU requests on the node
p19 := test.BuildTestPod("CPU-eater", 150, 150, node6.Name, nil)
p19.Namespace = "test"
// Dummy pod for node6 used to do the opposite of p19
p20 := test.BuildTestPod("CPU-saver", 100, 150, node6.Name, nil)
p20.Namespace = "test"
// ### Evictable Pods ###
// Three Pods in the "default" Namespace, bound to same ReplicaSet. 2 should be evicted.
@@ -263,6 +272,20 @@ func TestFindDuplicatePods(t *testing.T) {
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available does not have enough CPU, and nodeFit set to true. 0 should be evicted.",
pods: []*v1.Pod{p1, p2, p3, p19},
nodes: []*v1.Node{node1, node6},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available has enough CPU, and nodeFit set to true. 1 should be evicted.",
pods: []*v1.Pod{p1, p2, p3, p20},
nodes: []*v1.Node{node1, node6},
expectedEvictedPodCount: 1,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
}
for _, testCase := range testCases {
@@ -297,6 +320,7 @@ func TestFindDuplicatePods(t *testing.T) {
nil,
nil,
testCase.nodes,
getPodsAssignedToNode,
false,
false,
false,
@@ -724,6 +748,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
nil,
nil,
testCase.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -166,9 +166,12 @@ func TestRemoveFailedPods(t *testing.T) {
{
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, nil, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) {
node.Spec.Unschedulable = true
})},
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) {
node.Spec.Unschedulable = true
}),
},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
@@ -176,6 +179,17 @@ func TestRemoveFailedPods(t *testing.T) {
}), nil),
},
},
{
description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}), nil),
},
},
{
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false),
@@ -261,6 +275,7 @@ func TestRemoveFailedPods(t *testing.T) {
nil,
nil,
tc.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -95,8 +95,8 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter
getPodsAssignedToNode,
podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
return evictable.IsEvictable(pod) &&
!nodeutil.PodFitsCurrentNode(pod, node) &&
nodeutil.PodFitsAnyNode(pod, nodes)
!nodeutil.PodFitsCurrentNode(getPodsAssignedToNode, pod, node) &&
nodeutil.PodFitsAnyNode(getPodsAssignedToNode, pod, nodes)
}),
)
if err != nil {

View File

@@ -222,6 +222,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -70,6 +70,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
Unschedulable: true,
}
})
node5 := test.BuildTestNode("n5", 1, 1, 1, nil)
node5 := test.BuildTestNode("n5", 2000, 3000, 10, nil)
node5.Spec.Taints = []v1.Taint{
@@ -289,6 +290,15 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
excludedTaints: []string{"testTaint1=test2"},
expectedEvictedPodCount: 1, // pod gets evicted, as excluded taint value does not match node1's taint value
},
{
description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.",
pods: []*v1.Pod{p2, p7, p9, p10},
nodes: []*v1.Node{node1, node5},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
expectedEvictedPodCount: 0, //p2 and p7 can't be evicted
nodeFit: true,
},
}
for _, tc := range tests {
@@ -324,6 +334,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes,
getPodsAssignedToNode,
tc.evictLocalStoragePods,
tc.evictSystemCriticalPods,
false,

View File

@@ -83,7 +83,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate
"Pods", thresholds[v1.ResourcePods],
}
for name := range thresholds {
if !isBasicResource(name) {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
}
}
@@ -164,7 +164,7 @@ func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
for name := range thresholds {
if !isBasicResource(name) {
if !nodeutil.IsBasicResource(name) {
targetThresholds[name] = MaxResourcePercentage
}
}

View File

@@ -385,6 +385,50 @@ func TestHighNodeUtilization(t *testing.T) {
},
expectedPodsEvicted: 0,
},
{
name: "Other node does not have enough Memory",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
// A pod requesting more memory than is available on node1
test.SetRSOwnerRef(pod)
}),
},
expectedPodsEvicted: 0,
},
{
name: "Other node does not have enough Memory",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
// A pod requesting more memory than is available on node1
test.SetRSOwnerRef(pod)
}),
},
expectedPodsEvicted: 0,
},
}
for _, testCase := range testCases {
@@ -463,6 +507,7 @@ func TestHighNodeUtilization(t *testing.T) {
nil,
nil,
testCase.nodes,
getPodsAssignedToNode,
false,
false,
false,
@@ -668,6 +713,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
&item.evictionsExpected,
nil,
item.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -111,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
"Pods", thresholds[v1.ResourcePods],
}
for name := range thresholds {
if !isBasicResource(name) {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
}
}
@@ -125,7 +125,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
"Pods", targetThresholds[v1.ResourcePods],
}
for name := range targetThresholds {
if !isBasicResource(name) {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name]))
}
}

View File

@@ -695,6 +695,112 @@ func TestLowNodeUtilization(t *testing.T) {
expectedPodsEvicted: 2,
evictedPods: []string{},
},
{
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 500, 200, 9, nil),
test.BuildTestNode(n2NodeName, 200, 200, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: notMatchingNodeSelectorValue,
}
}),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 10, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p4", 400, 100, n1NodeName, func(pod *v1.Pod) {
// A pod that requests too much CPU
test.SetNormalOwnerRef(pod)
}),
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 3,
},
{
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 500, 200, 9, nil),
test.BuildTestNode(n2NodeName, 200, 200, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: notMatchingNodeSelectorValue,
}
}),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 10, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 10, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p4", 400, 100, n1NodeName, func(pod *v1.Pod) {
// A pod that requests too much CPU
test.SetNormalOwnerRef(pod)
}),
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 3,
},
}
for _, test := range testCases {
@@ -772,6 +878,7 @@ func TestLowNodeUtilization(t *testing.T) {
nil,
nil,
test.nodes,
getPodsAssignedToNode,
false,
false,
false,
@@ -1086,6 +1193,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
&item.evictionsExpected,
nil,
item.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -27,6 +27,8 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/descheduler/node"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
@@ -268,7 +270,7 @@ func evictPodsFromSourceNodes(
"Pods", totalAvailableUsage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !isBasicResource(name) {
if !node.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
@@ -277,7 +279,7 @@ func evictPodsFromSourceNodes(
for _, node := range sourceNodes {
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
nonRemovablePods, removablePods := classifyPods(ctx, node.allPods, podFilter)
klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods))
if len(removablePods) == 0 {
@@ -338,7 +340,7 @@ func evictPods(
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !isBasicResource(name) {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
@@ -361,7 +363,7 @@ func sortNodesByUsage(nodes []NodeInfo, ascending bool) {
// extended resources
for name := range nodes[i].usage {
if !isBasicResource(name) {
if !nodeutil.IsBasicResource(name) {
ti = ti + nodes[i].usage[name].Value()
tj = tj + nodes[j].usage[name].Value()
}
@@ -411,44 +413,7 @@ func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName {
return resourceNames
}
// isBasicResource checks if resource is basic native.
func isBasicResource(name v1.ResourceName) bool {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
return true
default:
return false
}
}
func nodeUtilization(node *v1.Node, pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
}
for _, name := range resourceNames {
if !isBasicResource(name) {
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for _, name := range resourceNames {
quantity, ok := req[name]
if ok && name != v1.ResourcePods {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
return totalReqs
}
func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
func classifyPods(ctx context.Context, pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
var nonRemovablePods, removablePods []*v1.Pod
for _, pod := range pods {

View File

@@ -47,6 +47,7 @@ func TestPodAntiAffinity(t *testing.T) {
Unschedulable: true,
}
})
node4 := test.BuildTestNode("n4", 2, 2, 1, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
@@ -174,6 +175,14 @@ func TestPodAntiAffinity(t *testing.T) {
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 0,
},
{
description: "Won't evict pods because only other node doesn't have enough resources",
maxPodsToEvictPerNode: &uint3,
pods: []*v1.Pod{p1, p2, p3, p4},
nodes: []*v1.Node{node1, node4},
expectedEvictedPodCount: 0,
nodeFit: true,
},
}
for _, test := range tests {
@@ -209,6 +218,7 @@ func TestPodAntiAffinity(t *testing.T) {
test.maxPodsToEvictPerNode,
test.maxNoOfPodsToEvictPerNamespace,
test.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -298,6 +298,7 @@ func TestPodLifeTime(t *testing.T) {
nil,
nil,
tc.nodes,
getPodsAssignedToNode,
false,
false,
tc.ignorePvcPods,

View File

@@ -97,8 +97,10 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
Unschedulable: true,
}
})
node4 := test.BuildTestNode("node4", 200, 3000, 10, nil)
node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil)
pods := initPods(node1)
pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil))
createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32, nodeFit bool) api.DeschedulerStrategy {
return api.DeschedulerStrategy{
@@ -199,6 +201,20 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
},
{
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node does not have enough CPU, 0 pod evictions",
strategy: createStrategy(true, true, 1, true),
nodes: []*v1.Node{node1, node4},
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
},
{
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node has enough CPU, 3 pod evictions",
strategy: createStrategy(true, true, 1, true),
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 3,
maxPodsToEvictPerNode: &uint3,
},
}
for _, tc := range tests {
@@ -234,6 +250,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -18,20 +18,18 @@ package strategies
import (
"context"
"fmt"
"math"
"sort"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
"sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation"
"sigs.k8s.io/descheduler/pkg/utils"
@@ -170,7 +168,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
continue
}
balanceDomains(podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodeMap)
balanceDomains(ctx, client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodes)
}
}
@@ -225,15 +223,18 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol
// [5, 5, 5, 5, 5, 5]
// (assuming even distribution by the scheduler of the evicted pods)
func balanceDomains(
ctx context.Context,
client clientset.Interface,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
podsForEviction map[*v1.Pod]struct{},
constraint v1.TopologySpreadConstraint,
constraintTopologies map[topologyPair][]*v1.Pod,
sumPods float64,
isEvictable func(*v1.Pod) bool,
nodeMap map[string]*v1.Node) {
isEvictable func(pod *v1.Pod) bool,
nodes []*v1.Node) {
idealAvg := sumPods / float64(len(constraintTopologies))
sortedDomains := sortDomains(constraintTopologies, isEvictable)
sortedDomains := sortDomains(ctx, constraintTopologies, isEvictable)
// i is the index for belowOrEqualAvg
// j is the index for aboveAvg
i := 0
@@ -273,8 +274,8 @@ func balanceDomains(
// also (just for tracking), add them to the list of pods in the lower topology
aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:]
for k := range aboveToEvict {
if err := validatePodFitsOnOtherNodes(aboveToEvict[k], nodeMap); err != nil {
klog.V(2).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(aboveToEvict[k]))
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
continue
}
@@ -285,56 +286,6 @@ func balanceDomains(
}
}
// validatePodFitsOnOtherNodes performs validation based on scheduling predicates for affinity and toleration.
// It excludes the current node because, for the sake of domain balancing only, we care about if there is any other
// place it could theoretically fit.
// If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading
func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error {
// if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node,
// don't bother evicting it as it will just end up back on the same node
// however we still account for it "being evicted" so the algorithm can complete
// TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod
// but still try to get the required # of movePods (instead of just chopping that value off the slice above)
isRequiredDuringSchedulingIgnoredDuringExecution := pod.Spec.Affinity != nil &&
pod.Spec.Affinity.NodeAffinity != nil &&
pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil
hardTaintsFilter := func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
}
var eligibleNodesCount, ineligibleAffinityNodesCount, ineligibleTaintedNodesCount int
for _, node := range nodeMap {
if node == nodeMap[pod.Spec.NodeName] {
continue
}
if pod.Spec.NodeSelector != nil || isRequiredDuringSchedulingIgnoredDuringExecution {
if !nodeutil.PodFitsCurrentNode(pod, node) {
ineligibleAffinityNodesCount++
continue
}
}
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, hardTaintsFilter) {
ineligibleTaintedNodesCount++
continue
}
eligibleNodesCount++
}
if eligibleNodesCount == 0 {
var errs []error
if ineligibleAffinityNodesCount > 0 {
errs = append(errs, fmt.Errorf("%d nodes with ineligible selector/affinity", ineligibleAffinityNodesCount))
}
if ineligibleTaintedNodesCount > 0 {
errs = append(errs, fmt.Errorf("%d nodes with taints that are not tolerated", ineligibleTaintedNodesCount))
}
return utilerrors.NewAggregate(errs)
}
return nil
}
// sortDomains sorts and splits the list of topology domains based on their size
// it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order:
// 1. non-evictable pods
@@ -342,7 +293,7 @@ func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error
// 3. pods in descending priority
// 4. all other pods
// We then pop pods off the back of the list for eviction
func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(*v1.Pod) bool) []topology {
func sortDomains(ctx context.Context, constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(pod *v1.Pod) bool) []topology {
sortedTopologies := make([]topology, 0, len(constraintTopologyPairs))
// sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted)
for pair, list := range constraintTopologyPairs {

View File

@@ -483,6 +483,38 @@ func TestTopologySpreadConstraint(t *testing.T) {
},
namespaces: []string{"ns1"},
},
{
name: "2 domains size [2 6], maxSkew=2, can't move any because node1 does not have enough CPU",
nodes: []*v1.Node{
test.BuildTestNode("n1", 200, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
},
pods: createTestPods([]testPodList{
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(2),
},
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
},
{
count: 6,
node: "n2",
labels: map[string]string{"foo": "bar"},
},
}),
expectedEvictedCount: 0,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: true,
},
},
namespaces: []string{"ns1"},
},
{
// see https://github.com/kubernetes-sigs/descheduler/issues/564
name: "Multiple constraints (6 nodes/2 zones, 4 pods)",
@@ -686,7 +718,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
namespaces: []string{"ns1"},
},
{
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node",
name: "2 domains, sizes [2,0], maxSkew=1, move 1 pods since pod does not tolerate the tainted node",
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
@@ -718,6 +750,43 @@ func TestTopologySpreadConstraint(t *testing.T) {
strategy: api.DeschedulerStrategy{},
namespaces: []string{"ns1"},
},
{
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node, and NodeFit is enabled",
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
n.Labels["zone"] = "zoneB"
n.Spec.Taints = []v1.Taint{
{
Key: "taint-test",
Value: "test",
Effect: v1.TaintEffectNoSchedule,
},
}
}),
},
pods: createTestPods([]testPodList{
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 1,
node: "n1",
labels: map[string]string{"foo": "bar"},
nodeSelector: map[string]string{"zone": "zoneA"},
},
}),
expectedEvictedCount: 0,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: true,
},
},
namespaces: []string{"ns1"},
},
{
name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint",
nodes: []*v1.Node{
@@ -902,6 +971,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
nil,
nil,
tc.nodes,
getPodsAssignedToNode,
false,
false,
false,

View File

@@ -6,25 +6,9 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
"k8s.io/klog/v2"
)
const (
// owner: @jinxu
// beta: v1.10
//
// New local storage types to support local storage capacity isolation
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"
// owner: @egernst
// alpha: v1.16
//
// Enables PodOverhead, for accounting pod overheads which are specific to a given RuntimeClass
PodOverhead featuregate.Feature = "PodOverhead"
)
// GetResourceRequest finds and returns the request value for a specific resource.
func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
if resource == v1.ResourcePods {
@@ -53,11 +37,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
}
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
return requestQuantity
}
for _, container := range pod.Spec.Containers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
requestQuantity.Add(rQuantity)
@@ -72,9 +51,9 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
}
}
// if PodOverhead feature is supported, add overhead for running a pod
// to the total requests if the resource total is non-zero
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) {
// We assume pod overhead feature gate is enabled.
// We can't import the scheduler settings so we will inherit the default.
if pod.Spec.Overhead != nil {
if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() {
requestQuantity.Add(podOverhead)
}
@@ -162,9 +141,9 @@ func PodRequestsAndLimits(pod *v1.Pod) (reqs, limits v1.ResourceList) {
maxResourceList(limits, container.Resources.Limits)
}
// if PodOverhead feature is supported, add overhead for running a pod
// to the sum of reqeuests and to non-zero limits:
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) {
// We assume pod overhead feature gate is enabled.
// We can't import the scheduler settings so we will inherit the default.
if pod.Spec.Overhead != nil {
addResourceList(reqs, pod.Spec.Overhead)
for name, quantity := range pod.Spec.Overhead {

View File

@@ -144,6 +144,7 @@ func TestRemoveDuplicates(t *testing.T) {
nil,
nil,
nodes,
getPodsAssignedToNode,
true,
false,
false,

View File

@@ -83,7 +83,7 @@ func TestFailedPods(t *testing.T) {
defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy})
waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed)
podEvictor := initPodEvictorOrFail(t, clientSet, nodes)
podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
t.Logf("Running RemoveFailedPods strategy for %s", name)
strategies.RemoveFailedPods(

View File

@@ -39,7 +39,6 @@ import (
v1qos "k8s.io/kubectl/pkg/util/qos"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
deschedulerapi "sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler"
"sigs.k8s.io/descheduler/pkg/descheduler/client"
@@ -199,6 +198,7 @@ func runPodLifetimeStrategy(
nil,
nil,
nodes,
getPodsAssignedToNode,
false,
evictCritical,
false,
@@ -324,7 +324,7 @@ func TestLowNodeUtilization(t *testing.T) {
waitForRCPodsRunning(ctx, t, clientSet, rc)
// Run LowNodeUtilization strategy
podEvictor := initPodEvictorOrFail(t, clientSet, nodes)
podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
podFilter, err := podutil.NewOptions().WithFilter(podEvictor.Evictable().IsEvictable).BuildFilterFunc()
if err != nil {
@@ -886,17 +886,6 @@ func TestEvictAnnotation(t *testing.T) {
clientSet, nodeInformer, getPodsAssignedToNode, stopCh := initializeClient(t)
defer close(stopCh)
nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Error listing node with %v", err)
}
var nodes []*v1.Node
for i := range nodeList.Items {
node := nodeList.Items[i]
nodes = append(nodes, &node)
}
testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}}
if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
t.Fatalf("Unable to create ns %v", testNamespace.Name)
@@ -977,7 +966,7 @@ func TestDeschedulingInterval(t *testing.T) {
}
s.Client = clientSet
deschedulerPolicy := &api.DeschedulerPolicy{}
deschedulerPolicy := &deschedulerapi.DeschedulerPolicy{}
c := make(chan bool, 1)
go func() {
@@ -1349,7 +1338,7 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) {
return allNodes, workerNodes
}
func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor {
func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, nodes []*v1.Node) *evictions.PodEvictor {
evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet)
if err != nil || len(evictionPolicyGroupVersion) == 0 {
t.Fatalf("Error creating eviction policy group: %v", err)
@@ -1361,6 +1350,7 @@ func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*
nil,
nil,
nodes,
getPodsAssignedToNode,
true,
false,
false,

View File

@@ -137,6 +137,7 @@ func TestTooManyRestarts(t *testing.T) {
nil,
nil,
nodes,
getPodsAssignedToNode,
true,
false,
false,

View File

@@ -77,7 +77,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
defer deleteRC(ctx, t, clientSet, violatorRc)
waitForRCPodsRunning(ctx, t, clientSet, violatorRc)
podEvictor := initPodEvictorOrFail(t, clientSet, nodes)
podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
// Run TopologySpreadConstraint strategy
t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name)