mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
Merge pull request #790 from ingvagabund/636
Added request considerations to NodeFit Feature [#636 follow up]
This commit is contained in:
@@ -737,8 +737,9 @@ The following strategies accept a `nodeFit` boolean parameter which can optimize
|
||||
|
||||
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
|
||||
- A `nodeSelector` on the pod
|
||||
- Any `Tolerations` on the pod and any `Taints` on the other nodes
|
||||
- Any `tolerations` on the pod and any `taints` on the other nodes
|
||||
- `nodeAffinity` on the pod
|
||||
- Resource `requests` made by the pod and the resources available on other nodes
|
||||
- Whether any of the other nodes are marked as `unschedulable`
|
||||
|
||||
E.g.
|
||||
|
||||
@@ -283,6 +283,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
|
||||
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
|
||||
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
|
||||
nodes,
|
||||
getPodsAssignedToNode,
|
||||
evictLocalStoragePods,
|
||||
evictSystemCriticalPods,
|
||||
ignorePvcPods,
|
||||
|
||||
@@ -51,6 +51,7 @@ type namespacePodEvictCount map[string]uint
|
||||
type PodEvictor struct {
|
||||
client clientset.Interface
|
||||
nodes []*v1.Node
|
||||
nodeIndexer podutil.GetPodsAssignedToNodeFunc
|
||||
policyGroupVersion string
|
||||
dryRun bool
|
||||
maxPodsToEvictPerNode *uint
|
||||
@@ -71,6 +72,7 @@ func NewPodEvictor(
|
||||
maxPodsToEvictPerNode *uint,
|
||||
maxPodsToEvictPerNamespace *uint,
|
||||
nodes []*v1.Node,
|
||||
nodeIndexer podutil.GetPodsAssignedToNodeFunc,
|
||||
evictLocalStoragePods bool,
|
||||
evictSystemCriticalPods bool,
|
||||
ignorePvcPods bool,
|
||||
@@ -87,6 +89,7 @@ func NewPodEvictor(
|
||||
return &PodEvictor{
|
||||
client: client,
|
||||
nodes: nodes,
|
||||
nodeIndexer: nodeIndexer,
|
||||
policyGroupVersion: policyGroupVersion,
|
||||
dryRun: dryRun,
|
||||
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
|
||||
@@ -296,7 +299,7 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
|
||||
}
|
||||
if options.nodeFit {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
if !nodeutil.PodFitsAnyOtherNode(pod, pe.nodes) {
|
||||
if !nodeutil.PodFitsAnyOtherNode(pe.nodeIndexer, pod, pe.nodes) {
|
||||
return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable")
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -21,8 +21,10 @@ import (
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
@@ -80,9 +82,9 @@ func TestIsEvictable(t *testing.T) {
|
||||
nodeLabelKey := "datacenter"
|
||||
nodeLabelValue := "east"
|
||||
type testCase struct {
|
||||
pod *v1.Pod
|
||||
description string
|
||||
pods []*v1.Pod
|
||||
nodes []*v1.Node
|
||||
runBefore func(*v1.Pod, []*v1.Node)
|
||||
evictFailedBarePods bool
|
||||
evictLocalStoragePods bool
|
||||
evictSystemCriticalPods bool
|
||||
@@ -92,80 +94,96 @@ func TestIsEvictable(t *testing.T) {
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{ // Failed pod eviction with no ownerRefs.
|
||||
pod: test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
{
|
||||
description: "Failed pod eviction with no ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Status.Phase = v1.PodFailed
|
||||
}),
|
||||
},
|
||||
evictFailedBarePods: false,
|
||||
result: false,
|
||||
}, { // Normal pod eviction with no ownerRefs and evictFailedBarePods enabled
|
||||
pod: test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
},
|
||||
}, {
|
||||
description: "Normal pod eviction with no ownerRefs and evictFailedBarePods enabled",
|
||||
pods: []*v1.Pod{test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil)},
|
||||
evictFailedBarePods: true,
|
||||
result: false,
|
||||
}, { // Failed pod eviction with no ownerRefs
|
||||
pod: test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Failed pod eviction with no ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Status.Phase = v1.PodFailed
|
||||
}),
|
||||
},
|
||||
evictFailedBarePods: true,
|
||||
result: true,
|
||||
}, { // Normal pod eviction with normal ownerRefs
|
||||
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Normal pod eviction with normal ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Normal pod eviction with replicaSet ownerRefs
|
||||
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
}, {
|
||||
description: "Normal pod eviction with replicaSet ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p4", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Normal pod eviction with statefulSet ownerRefs
|
||||
pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
|
||||
}, {
|
||||
description: "Normal pod eviction with statefulSet ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p18", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p19", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false
|
||||
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod not evicted because it is bound to a PV and evictLocalStoragePods = false",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p5", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
@@ -177,13 +195,15 @@ func TestIsEvictable(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true
|
||||
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = true",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p6", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
@@ -195,13 +215,15 @@ func TestIsEvictable(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: true,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p7", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
@@ -214,139 +236,167 @@ func TestIsEvictable(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Pod not evicted becasuse it is part of a daemonSet
|
||||
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it is part of a daemonSet",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Pod not evicted becasuse it is a mirror pod
|
||||
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it is a mirror poddsa",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p10", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Pod not evicted becasuse it has system critical priority
|
||||
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it has system critical priority",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p13", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
pod.Annotations = map[string]string{
|
||||
"descheduler.alpha.kubernetes.io/evict": "true",
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold
|
||||
pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it has a priority higher than the configured priority threshold",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p14", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: false,
|
||||
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p15", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: true,
|
||||
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true
|
||||
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
result: true,
|
||||
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
result: true,
|
||||
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true
|
||||
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: true,
|
||||
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
|
||||
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: true,
|
||||
}, { // Pod with no tolerations running on normal node, all other nodes tainted
|
||||
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
}, {
|
||||
description: "Pod with no tolerations running on normal node, all other nodes tainted",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
for _, node := range nodes {
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
@@ -354,14 +404,26 @@ func TestIsEvictable(t *testing.T) {
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
}, { // Pod with correct tolerations running on normal node, all other nodes tainted
|
||||
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
}, {
|
||||
description: "Pod with correct tolerations running on normal node, all other nodes tainted",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
@@ -370,11 +432,9 @@ func TestIsEvictable(t *testing.T) {
|
||||
},
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
for _, node := range nodes {
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
@@ -382,67 +442,216 @@ func TestIsEvictable(t *testing.T) {
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: true,
|
||||
}, { // Pod with incorrect node selector
|
||||
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
}, {
|
||||
description: "Pod with incorrect node selector",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: "fail",
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
for _, node := range nodes {
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
}, { // Pod with correct node selector
|
||||
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
}, {
|
||||
description: "Pod with correct node selector",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
|
||||
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
for _, node := range nodes {
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod with correct node selector, but only available node doesn't have enough CPU",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2-TEST", 10, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3-TEST", 10, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod with correct node selector, and one node has enough memory",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node2-pod-10GB-mem", 20, 10, "node2", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node3-pod-10GB-mem", 20, 10, "node3", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 100, 20, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod with correct node selector, but both nodes don't have enough memory",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node2-pod-10GB-mem", 10, 10, "node2", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node3-pod-10GB-mem", 10, 10, "node3", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 100, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
test.runBefore(test.pod, test.nodes)
|
||||
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
nodes := append(test.nodes, n1)
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range test.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range test.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := &PodEvictor{
|
||||
client: fakeClient,
|
||||
nodes: nodes,
|
||||
nodeIndexer: getPodsAssignedToNode,
|
||||
policyGroupVersion: policyv1.SchemeGroupVersion.String(),
|
||||
dryRun: false,
|
||||
maxPodsToEvictPerNode: nil,
|
||||
maxPodsToEvictPerNamespace: nil,
|
||||
evictLocalStoragePods: test.evictLocalStoragePods,
|
||||
evictSystemCriticalPods: test.evictSystemCriticalPods,
|
||||
evictFailedBarePods: test.evictFailedBarePods,
|
||||
nodes: nodes,
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable()
|
||||
var opts []func(opts *Options)
|
||||
if test.priorityThreshold != nil {
|
||||
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
|
||||
@@ -450,13 +659,13 @@ func TestIsEvictable(t *testing.T) {
|
||||
if test.nodeFit {
|
||||
opts = append(opts, WithNodeFit(true))
|
||||
}
|
||||
evictable = podEvictor.Evictable(opts...)
|
||||
evictable := podEvictor.Evictable(opts...)
|
||||
|
||||
result := evictable.IsEvictable(test.pod)
|
||||
result := evictable.IsEvictable(test.pods[0])
|
||||
if result != test.result {
|
||||
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
|
||||
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pods[0].Name, test.result, result)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
func TestPodTypes(t *testing.T) {
|
||||
|
||||
@@ -18,13 +18,16 @@ package node
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
@@ -96,32 +99,90 @@ func IsReady(node *v1.Node) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// PodFitsAnyOtherNode checks if the given pod fits any of the given nodes, besides the node
|
||||
// the pod is already running on. The node fit is based on multiple criteria, like, pod node selector
|
||||
// matching the node label (including affinity), the taints on the node, and the node being schedulable or not.
|
||||
func PodFitsAnyOtherNode(pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
// NodeFit returns true if the provided pod can be scheduled onto the provided node.
|
||||
// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
|
||||
// This function currently considers a subset of the Kubernetes Scheduler's predicates when
|
||||
// deciding if a pod would fit on a node, but more predicates may be added in the future.
|
||||
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
|
||||
// Check node selector and required affinity
|
||||
var errors []error
|
||||
if ok, err := utils.PodMatchNodeSelector(pod, node); err != nil {
|
||||
errors = append(errors, err)
|
||||
} else if !ok {
|
||||
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
|
||||
}
|
||||
// Check taints (we only care about NoSchedule and NoExecute taints)
|
||||
ok := utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
})
|
||||
if !ok {
|
||||
errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
|
||||
}
|
||||
// Check if the pod can fit on a node based off it's requests
|
||||
ok, reqErrors := fitsRequest(nodeIndexer, pod, node)
|
||||
if !ok {
|
||||
errors = append(errors, reqErrors...)
|
||||
}
|
||||
// Check if node is schedulable
|
||||
if IsNodeUnschedulable(node) {
|
||||
errors = append(errors, fmt.Errorf("node is not schedulable"))
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
// PodFitsAnyOtherNode checks if the given pod will fit any of the given nodes, besides the node
|
||||
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
|
||||
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
for _, node := range nodes {
|
||||
// Skip node pod is already on
|
||||
if node.Name == pod.Spec.NodeName {
|
||||
continue
|
||||
}
|
||||
// Check node selector and required affinity
|
||||
ok, err := utils.PodMatchNodeSelector(pod, node)
|
||||
if err != nil || !ok {
|
||||
continue
|
||||
}
|
||||
// Check taints (we only care about NoSchedule and NoExecute taints)
|
||||
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
// Check if node is schedulable
|
||||
if !IsNodeUnschedulable(node) {
|
||||
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
|
||||
errors := NodeFit(nodeIndexer, pod, node)
|
||||
if len(errors) == 0 {
|
||||
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
} else {
|
||||
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
for _, err := range errors {
|
||||
klog.V(4).InfoS(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodFitsAnyNode checks if the given pod will fit any of the given nodes. The predicates used
|
||||
// to determine if the pod will fit can be found in the NodeFit function.
|
||||
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
for _, node := range nodes {
|
||||
errors := NodeFit(nodeIndexer, pod, node)
|
||||
if len(errors) == 0 {
|
||||
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
} else {
|
||||
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
for _, err := range errors {
|
||||
klog.V(4).InfoS(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodFitsCurrentNode checks if the given pod will fit onto the given node. The predicates used
|
||||
// to determine if the pod will fit can be found in the NodeFit function.
|
||||
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
|
||||
errors := NodeFit(nodeIndexer, pod, node)
|
||||
if len(errors) == 0 {
|
||||
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
} else {
|
||||
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
for _, err := range errors {
|
||||
klog.V(4).InfoS(err.Error())
|
||||
}
|
||||
}
|
||||
return false
|
||||
@@ -133,39 +194,95 @@ func IsNodeUnschedulable(node *v1.Node) bool {
|
||||
return node.Spec.Unschedulable
|
||||
}
|
||||
|
||||
// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on
|
||||
// multiple criteria, like, pod node selector matching the node label, node
|
||||
// being schedulable or not.
|
||||
func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
for _, node := range nodes {
|
||||
// fitsRequest determines if a pod can fit on a node based on its resource requests. It returns true if
|
||||
// the pod will fit.
|
||||
func fitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
|
||||
var insufficientResources []error
|
||||
|
||||
ok, err := utils.PodMatchNodeSelector(pod, node)
|
||||
if err != nil || !ok {
|
||||
continue
|
||||
// Get pod requests
|
||||
podRequests, _ := utils.PodRequestsAndLimits(pod)
|
||||
resourceNames := make([]v1.ResourceName, 0, len(podRequests))
|
||||
for name := range podRequests {
|
||||
resourceNames = append(resourceNames, name)
|
||||
}
|
||||
if !IsNodeUnschedulable(node) {
|
||||
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodFitsCurrentNode checks if the given pod fits on the given node if the pod
|
||||
// node selector matches the node label.
|
||||
func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
|
||||
ok, err := utils.PodMatchNodeSelector(pod, node)
|
||||
|
||||
availableResources, err := nodeAvailableResources(nodeIndexer, node, resourceNames)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to match node selector")
|
||||
return false
|
||||
return false, []error{err}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
klog.V(2).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return false
|
||||
podFitsOnNode := true
|
||||
for _, resource := range resourceNames {
|
||||
podResourceRequest := podRequests[resource]
|
||||
availableResource, ok := availableResources[resource]
|
||||
if !ok || podResourceRequest.MilliValue() > availableResource.MilliValue() {
|
||||
insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
|
||||
podFitsOnNode = false
|
||||
}
|
||||
}
|
||||
return podFitsOnNode, insufficientResources
|
||||
}
|
||||
|
||||
// nodeAvailableResources returns resources mapped to the quanitity available on the node.
|
||||
func nodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
|
||||
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodeUtilization := NodeUtilization(podsOnNode, resourceNames)
|
||||
remainingResources := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-nodeUtilization[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-nodeUtilization[v1.ResourceMemory].Value(), resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-nodeUtilization[v1.ResourcePods].Value(), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !IsBasicResource(name) {
|
||||
if _, exists := node.Status.Allocatable[name]; exists {
|
||||
allocatableResource := node.Status.Allocatable[name]
|
||||
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-nodeUtilization[name].Value(), resource.DecimalSI)
|
||||
} else {
|
||||
remainingResources[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return remainingResources, nil
|
||||
}
|
||||
|
||||
// NodeUtilization returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
|
||||
func NodeUtilization(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
|
||||
totalReqs := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !IsBasicResource(name) {
|
||||
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
req, _ := utils.PodRequestsAndLimits(pod)
|
||||
for _, name := range resourceNames {
|
||||
quantity, ok := req[name]
|
||||
if ok && name != v1.ResourcePods {
|
||||
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
|
||||
// the format of the quantity will be updated to the format of y.
|
||||
totalReqs[name].Add(quantity)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return totalReqs
|
||||
}
|
||||
|
||||
// IsBasicResource checks if resource is basic native.
|
||||
func IsBasicResource(name v1.ResourceName) bool {
|
||||
switch name {
|
||||
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
klog.V(2).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -21,9 +21,12 @@ import (
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
@@ -147,13 +150,13 @@ func TestPodFitsCurrentNode(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
@@ -181,27 +184,48 @@ func TestPodFitsCurrentNode(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
success: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
actual := PodFitsCurrentNode(tc.pod, tc.node)
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
objs = append(objs, tc.node)
|
||||
objs = append(objs, tc.pod)
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
actual := PodFitsCurrentNode(getPodsAssignedToNode, tc.pod, tc.node)
|
||||
if actual != tc.success {
|
||||
t.Errorf("Test %#v failed", tc.description)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodFitsAnyOtherNode(t *testing.T) {
|
||||
|
||||
nodeLabelKey := "kubernetes.io/desiredNode"
|
||||
nodeLabelValue := "yes"
|
||||
nodeTaintKey := "hardware"
|
||||
@@ -215,238 +239,527 @@ func TestPodFitsAnyOtherNode(t *testing.T) {
|
||||
pod *v1.Pod
|
||||
nodes []*v1.Node
|
||||
success bool
|
||||
podsOnNodes []*v1.Pod
|
||||
}{
|
||||
{
|
||||
description: "Pod fits another node matching node affinity",
|
||||
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[0],
|
||||
Labels: map[string]string{
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[1],
|
||||
Labels: map[string]string{
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[2],
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Pod expected to fit one of the nodes",
|
||||
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[0],
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[1],
|
||||
Labels: map[string]string{
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[2],
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Pod expected to fit none of the nodes",
|
||||
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[0],
|
||||
Labels: map[string]string{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "unfit1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[1],
|
||||
Labels: map[string]string{
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "unfit2",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[2],
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Nodes are unschedulable but labels match, should fail",
|
||||
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[0],
|
||||
Labels: map[string]string{
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Unschedulable: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[1],
|
||||
Labels: map[string]string{
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Unschedulable = true
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[2],
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, should pass",
|
||||
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[0],
|
||||
Labels: map[string]string{
|
||||
description: "Both nodes are tained, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Taints: []v1.Taint{
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[1],
|
||||
Labels: map[string]string{
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[2],
|
||||
description: "Two nodes matches node selector, one of them is tained, there is a pod on the available node, and requests are low, should pass",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("test-pod", 12*1000, 20*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(40*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Both nodes are tained, should fail",
|
||||
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[0],
|
||||
Labels: map[string]string{
|
||||
description: "Two nodes matches node selector, one of them is tained, but CPU requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Taints: []v1.Taint{
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 4 cores, the pod already on the node below requests 3 cores, and the pod above requests 2 cores
|
||||
test.BuildTestNode(nodeNames[1], 4000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("3-core-pod", 3000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[1],
|
||||
Labels: map[string]string{
|
||||
description: "Two nodes matches node selector, one of them is tained, but memory requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 5*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Taints: []v1.Taint{
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 8GB of memory, the pod already on the node below requests 4GB, and the pod above requests 5GB
|
||||
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("4GB-mem-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeNames[2],
|
||||
description: "Two nodes matches node selector, one of them is tained, but ephemeral storage requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 4*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 20GB of storage, the pod already on the node below requests 11GB, and the pod above requests 10GB
|
||||
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(20*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("11GB-storage-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(11*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, but custom resource requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
|
||||
}),
|
||||
// Notice that this node only has 15 of the custom resource, the pod already on the node below requests 10, and the pod above requests 10
|
||||
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("10-custom-resource-pod", 0, 0, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, and pod Overhead is low enough, should pass",
|
||||
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node has 5 CPU cores, the pod below requests 2 cores, and has CPU overhead of 1 cores, and the pod above requests 1 core
|
||||
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
pod.Spec.Overhead = createResourceList(1000, 1000*1000*1000, 1000*1000*1000)
|
||||
}),
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, but pod Overhead is too high, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 5 CPU cores, the pod below requests 2 cores, but has CPU overhead of 2 cores, and the pod above requests 2 cores
|
||||
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
pod.Spec.Overhead = createResourceList(2000, 1000*1000*1000, 1000*1000*1000)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
actual := PodFitsAnyOtherNode(tc.pod, tc.nodes)
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range tc.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range tc.podsOnNodes {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
objs = append(objs, tc.pod)
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
actual := PodFitsAnyOtherNode(getPodsAssignedToNode, tc.pod, tc.nodes)
|
||||
if actual != tc.success {
|
||||
t.Errorf("Test %#v failed", tc.description)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func createPodManifest(nodeName string, nodeSelectorKey string, nodeSelectorValue string) *v1.Pod {
|
||||
return (&v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: nodeName,
|
||||
Affinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: nodeSelectorKey,
|
||||
Operator: "In",
|
||||
Values: []string{
|
||||
nodeSelectorValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
// createResourceList builds a small resource list of core resources
|
||||
func createResourceList(cpu int64, memory int64, ephemeralStorage int64) v1.ResourceList {
|
||||
resourceList := make(map[v1.ResourceName]resource.Quantity)
|
||||
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(cpu, resource.DecimalSI)
|
||||
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.DecimalSI)
|
||||
resourceList[v1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI)
|
||||
return resourceList
|
||||
}
|
||||
|
||||
@@ -67,6 +67,7 @@ func TestFindDuplicatePods(t *testing.T) {
|
||||
Unschedulable: true,
|
||||
}
|
||||
})
|
||||
node6 := test.BuildTestNode("n6", 200, 200, 10, nil)
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||
p1.Namespace = "dev"
|
||||
@@ -102,6 +103,14 @@ func TestFindDuplicatePods(t *testing.T) {
|
||||
p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil)
|
||||
p18.Namespace = "node-fit"
|
||||
|
||||
// This pod sits on node6 and is used to take up CPU requests on the node
|
||||
p19 := test.BuildTestPod("CPU-eater", 150, 150, node6.Name, nil)
|
||||
p19.Namespace = "test"
|
||||
|
||||
// Dummy pod for node6 used to do the opposite of p19
|
||||
p20 := test.BuildTestPod("CPU-saver", 100, 150, node6.Name, nil)
|
||||
p20.Namespace = "test"
|
||||
|
||||
// ### Evictable Pods ###
|
||||
|
||||
// Three Pods in the "default" Namespace, bound to same ReplicaSet. 2 should be evicted.
|
||||
@@ -263,6 +272,20 @@ func TestFindDuplicatePods(t *testing.T) {
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available does not have enough CPU, and nodeFit set to true. 0 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3, p19},
|
||||
nodes: []*v1.Node{node1, node6},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available has enough CPU, and nodeFit set to true. 1 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3, p20},
|
||||
nodes: []*v1.Node{node1, node6},
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
@@ -297,6 +320,7 @@ func TestFindDuplicatePods(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
testCase.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
@@ -724,6 +748,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
testCase.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -166,9 +166,12 @@ func TestRemoveFailedPods(t *testing.T) {
|
||||
{
|
||||
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
|
||||
strategy: createStrategy(true, false, nil, nil, nil, true),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) {
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) {
|
||||
node.Spec.Unschedulable = true
|
||||
})},
|
||||
}),
|
||||
},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
@@ -176,6 +179,17 @@ func TestRemoveFailedPods(t *testing.T) {
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction",
|
||||
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, true),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
|
||||
strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false),
|
||||
@@ -261,6 +275,7 @@ func TestRemoveFailedPods(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -95,8 +95,8 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter
|
||||
getPodsAssignedToNode,
|
||||
podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
|
||||
return evictable.IsEvictable(pod) &&
|
||||
!nodeutil.PodFitsCurrentNode(pod, node) &&
|
||||
nodeutil.PodFitsAnyNode(pod, nodes)
|
||||
!nodeutil.PodFitsCurrentNode(getPodsAssignedToNode, pod, node) &&
|
||||
nodeutil.PodFitsAnyNode(getPodsAssignedToNode, pod, nodes)
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
|
||||
@@ -222,6 +222,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
|
||||
tc.maxPodsToEvictPerNode,
|
||||
tc.maxNoOfPodsToEvictPerNamespace,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -76,6 +76,11 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
createPreferNoScheduleTaint("testTaint", "test", 1),
|
||||
}
|
||||
|
||||
node6 := test.BuildTestNode("n6", 1, 1, 1, nil)
|
||||
node6.Spec.Taints = []v1.Taint{
|
||||
createPreferNoScheduleTaint("testTaint", "test", 1),
|
||||
}
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
||||
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
|
||||
@@ -289,6 +294,15 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
excludedTaints: []string{"testTaint1=test2"},
|
||||
expectedEvictedPodCount: 1, // pod gets evicted, as excluded taint value does not match node1's taint value
|
||||
},
|
||||
{
|
||||
description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.",
|
||||
pods: []*v1.Pod{p2, p7, p9, p10},
|
||||
nodes: []*v1.Node{node1, node6},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
expectedEvictedPodCount: 0, //p2 and p7 can't be evicted
|
||||
nodeFit: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
@@ -324,6 +338,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
tc.maxPodsToEvictPerNode,
|
||||
tc.maxNoOfPodsToEvictPerNamespace,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
tc.evictLocalStoragePods,
|
||||
tc.evictSystemCriticalPods,
|
||||
false,
|
||||
|
||||
@@ -83,7 +83,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate
|
||||
"Pods", thresholds[v1.ResourcePods],
|
||||
}
|
||||
for name := range thresholds {
|
||||
if !isBasicResource(name) {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
|
||||
}
|
||||
}
|
||||
@@ -164,7 +164,7 @@ func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds
|
||||
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
|
||||
for name := range thresholds {
|
||||
if !isBasicResource(name) {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
targetThresholds[name] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,6 +385,50 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "Other node does not have enough Memory",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
|
||||
// A pod requesting more memory than is available on node1
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "Other node does not have enough Memory",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
|
||||
// A pod requesting more memory than is available on node1
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
@@ -463,6 +507,7 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
testCase.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
@@ -668,6 +713,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
|
||||
&item.evictionsExpected,
|
||||
nil,
|
||||
item.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -111,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
"Pods", thresholds[v1.ResourcePods],
|
||||
}
|
||||
for name := range thresholds {
|
||||
if !isBasicResource(name) {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
|
||||
}
|
||||
}
|
||||
@@ -125,7 +125,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
"Pods", targetThresholds[v1.ResourcePods],
|
||||
}
|
||||
for name := range targetThresholds {
|
||||
if !isBasicResource(name) {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name]))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -772,6 +772,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
test.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
@@ -1086,6 +1087,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
|
||||
&item.evictionsExpected,
|
||||
nil,
|
||||
item.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -27,6 +27,8 @@ import (
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
@@ -155,7 +157,7 @@ func getNodeUsage(
|
||||
|
||||
nodeUsageList = append(nodeUsageList, NodeUsage{
|
||||
node: node,
|
||||
usage: nodeUtilization(node, pods, resourceNames),
|
||||
usage: nodeutil.NodeUtilization(pods, resourceNames),
|
||||
allPods: pods,
|
||||
})
|
||||
}
|
||||
@@ -268,7 +270,7 @@ func evictPodsFromSourceNodes(
|
||||
"Pods", totalAvailableUsage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !isBasicResource(name) {
|
||||
if !node.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
|
||||
}
|
||||
}
|
||||
@@ -338,7 +340,7 @@ func evictPods(
|
||||
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !isBasicResource(name) {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
|
||||
}
|
||||
}
|
||||
@@ -361,7 +363,7 @@ func sortNodesByUsage(nodes []NodeInfo, ascending bool) {
|
||||
|
||||
// extended resources
|
||||
for name := range nodes[i].usage {
|
||||
if !isBasicResource(name) {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
ti = ti + nodes[i].usage[name].Value()
|
||||
tj = tj + nodes[j].usage[name].Value()
|
||||
}
|
||||
@@ -411,43 +413,6 @@ func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName {
|
||||
return resourceNames
|
||||
}
|
||||
|
||||
// isBasicResource checks if resource is basic native.
|
||||
func isBasicResource(name v1.ResourceName) bool {
|
||||
switch name {
|
||||
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func nodeUtilization(node *v1.Node, pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
|
||||
totalReqs := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !isBasicResource(name) {
|
||||
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
req, _ := utils.PodRequestsAndLimits(pod)
|
||||
for _, name := range resourceNames {
|
||||
quantity, ok := req[name]
|
||||
if ok && name != v1.ResourcePods {
|
||||
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
|
||||
// the format of the quantity will be updated to the format of y.
|
||||
totalReqs[name].Add(quantity)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return totalReqs
|
||||
}
|
||||
|
||||
func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
|
||||
var nonRemovablePods, removablePods []*v1.Pod
|
||||
|
||||
@@ -472,7 +437,7 @@ func averageNodeBasicresources(nodes []*v1.Node, getPodsAssignedToNode podutil.G
|
||||
numberOfNodes--
|
||||
continue
|
||||
}
|
||||
usage := nodeUtilization(node, pods, resourceNames)
|
||||
usage := nodeutil.NodeUtilization(pods, resourceNames)
|
||||
nodeCapacity := node.Status.Capacity
|
||||
if len(node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.Status.Allocatable
|
||||
|
||||
@@ -47,6 +47,7 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
Unschedulable: true,
|
||||
}
|
||||
})
|
||||
node4 := test.BuildTestNode("n4", 2, 2, 1, nil)
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
||||
@@ -174,6 +175,14 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
nodes: []*v1.Node{node1},
|
||||
expectedEvictedPodCount: 0,
|
||||
},
|
||||
{
|
||||
description: "Won't evict pods because only other node doesn't have enough resources",
|
||||
maxPodsToEvictPerNode: &uint3,
|
||||
pods: []*v1.Pod{p1, p2, p3, p4},
|
||||
nodes: []*v1.Node{node1, node4},
|
||||
expectedEvictedPodCount: 0,
|
||||
nodeFit: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
@@ -209,6 +218,7 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
test.maxPodsToEvictPerNode,
|
||||
test.maxNoOfPodsToEvictPerNamespace,
|
||||
test.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -298,6 +298,7 @@ func TestPodLifeTime(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
tc.ignorePvcPods,
|
||||
|
||||
@@ -97,8 +97,10 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
|
||||
Unschedulable: true,
|
||||
}
|
||||
})
|
||||
node4 := test.BuildTestNode("node4", 200, 3000, 10, nil)
|
||||
node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil)
|
||||
|
||||
pods := initPods(node1)
|
||||
pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil))
|
||||
|
||||
createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32, nodeFit bool) api.DeschedulerStrategy {
|
||||
return api.DeschedulerStrategy{
|
||||
@@ -199,6 +201,20 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
|
||||
expectedEvictedPodCount: 0,
|
||||
maxPodsToEvictPerNode: &uint3,
|
||||
},
|
||||
{
|
||||
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node does not have enough CPU, 0 pod evictions",
|
||||
strategy: createStrategy(true, true, 1, true),
|
||||
nodes: []*v1.Node{node1, node4},
|
||||
expectedEvictedPodCount: 0,
|
||||
maxPodsToEvictPerNode: &uint3,
|
||||
},
|
||||
{
|
||||
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node has enough CPU, 3 pod evictions",
|
||||
strategy: createStrategy(true, true, 1, true),
|
||||
nodes: []*v1.Node{node1, node5},
|
||||
expectedEvictedPodCount: 3,
|
||||
maxPodsToEvictPerNode: &uint3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
@@ -234,6 +250,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
|
||||
tc.maxPodsToEvictPerNode,
|
||||
tc.maxNoOfPodsToEvictPerNamespace,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -18,20 +18,18 @@ package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
@@ -170,7 +168,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
|
||||
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
|
||||
continue
|
||||
}
|
||||
balanceDomains(podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodeMap)
|
||||
balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictable.IsEvictable, nodes)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,12 +223,14 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol
|
||||
// [5, 5, 5, 5, 5, 5]
|
||||
// (assuming even distribution by the scheduler of the evicted pods)
|
||||
func balanceDomains(
|
||||
client clientset.Interface,
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||
podsForEviction map[*v1.Pod]struct{},
|
||||
constraint v1.TopologySpreadConstraint,
|
||||
constraintTopologies map[topologyPair][]*v1.Pod,
|
||||
sumPods float64,
|
||||
isEvictable func(*v1.Pod) bool,
|
||||
nodeMap map[string]*v1.Node) {
|
||||
isEvictable func(pod *v1.Pod) bool,
|
||||
nodes []*v1.Node) {
|
||||
|
||||
idealAvg := sumPods / float64(len(constraintTopologies))
|
||||
sortedDomains := sortDomains(constraintTopologies, isEvictable)
|
||||
@@ -273,8 +273,19 @@ func balanceDomains(
|
||||
// also (just for tracking), add them to the list of pods in the lower topology
|
||||
aboveToEvict := sortedDomains[j].pods[len(sortedDomains[j].pods)-movePods:]
|
||||
for k := range aboveToEvict {
|
||||
if err := validatePodFitsOnOtherNodes(aboveToEvict[k], nodeMap); err != nil {
|
||||
klog.V(2).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(aboveToEvict[k]))
|
||||
// PodFitsAnyOtherNode excludes the current node because, for the sake of domain balancing only, we care about if there is any other
|
||||
// place it could theoretically fit.
|
||||
// If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading
|
||||
// Also, if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node,
|
||||
// don't bother evicting it as it will just end up back on the same node
|
||||
// however we still account for it "being evicted" so the algorithm can complete
|
||||
// TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod
|
||||
// but still try to get the required # of movePods (instead of just chopping that value off the slice above).
|
||||
// In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes.
|
||||
// This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm.
|
||||
// So, a better selection heuristic could improve performance.
|
||||
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
|
||||
klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -285,56 +296,6 @@ func balanceDomains(
|
||||
}
|
||||
}
|
||||
|
||||
// validatePodFitsOnOtherNodes performs validation based on scheduling predicates for affinity and toleration.
|
||||
// It excludes the current node because, for the sake of domain balancing only, we care about if there is any other
|
||||
// place it could theoretically fit.
|
||||
// If the pod doesn't fit on its current node, that is a job for RemovePodsViolatingNodeAffinity, and irrelevant to Topology Spreading
|
||||
func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error {
|
||||
// if the pod has a hard nodeAffinity/nodeSelector/toleration that only matches this node,
|
||||
// don't bother evicting it as it will just end up back on the same node
|
||||
// however we still account for it "being evicted" so the algorithm can complete
|
||||
// TODO(@damemi): Since we don't order pods wrt their affinities, we should refactor this to skip the current pod
|
||||
// but still try to get the required # of movePods (instead of just chopping that value off the slice above)
|
||||
isRequiredDuringSchedulingIgnoredDuringExecution := pod.Spec.Affinity != nil &&
|
||||
pod.Spec.Affinity.NodeAffinity != nil &&
|
||||
pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil
|
||||
|
||||
hardTaintsFilter := func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
}
|
||||
|
||||
var eligibleNodesCount, ineligibleAffinityNodesCount, ineligibleTaintedNodesCount int
|
||||
for _, node := range nodeMap {
|
||||
if node == nodeMap[pod.Spec.NodeName] {
|
||||
continue
|
||||
}
|
||||
if pod.Spec.NodeSelector != nil || isRequiredDuringSchedulingIgnoredDuringExecution {
|
||||
if !nodeutil.PodFitsCurrentNode(pod, node) {
|
||||
ineligibleAffinityNodesCount++
|
||||
continue
|
||||
}
|
||||
}
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, hardTaintsFilter) {
|
||||
ineligibleTaintedNodesCount++
|
||||
continue
|
||||
}
|
||||
|
||||
eligibleNodesCount++
|
||||
}
|
||||
|
||||
if eligibleNodesCount == 0 {
|
||||
var errs []error
|
||||
if ineligibleAffinityNodesCount > 0 {
|
||||
errs = append(errs, fmt.Errorf("%d nodes with ineligible selector/affinity", ineligibleAffinityNodesCount))
|
||||
}
|
||||
if ineligibleTaintedNodesCount > 0 {
|
||||
errs = append(errs, fmt.Errorf("%d nodes with taints that are not tolerated", ineligibleTaintedNodesCount))
|
||||
}
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sortDomains sorts and splits the list of topology domains based on their size
|
||||
// it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order:
|
||||
// 1. non-evictable pods
|
||||
@@ -342,7 +303,7 @@ func validatePodFitsOnOtherNodes(pod *v1.Pod, nodeMap map[string]*v1.Node) error
|
||||
// 3. pods in descending priority
|
||||
// 4. all other pods
|
||||
// We then pop pods off the back of the list for eviction
|
||||
func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(*v1.Pod) bool) []topology {
|
||||
func sortDomains(constraintTopologyPairs map[topologyPair][]*v1.Pod, isEvictable func(pod *v1.Pod) bool) []topology {
|
||||
sortedTopologies := make([]topology, 0, len(constraintTopologyPairs))
|
||||
// sort the topologies and return 2 lists: those <= the average and those > the average (> list inverted)
|
||||
for pair, list := range constraintTopologyPairs {
|
||||
|
||||
@@ -483,6 +483,38 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "2 domains size [2 6], maxSkew=2, can't move any because node1 does not have enough CPU",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 200, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
|
||||
},
|
||||
pods: createTestPods([]testPodList{
|
||||
{
|
||||
count: 1,
|
||||
node: "n1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(2),
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
node: "n1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
},
|
||||
{
|
||||
count: 6,
|
||||
node: "n2",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 0,
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{
|
||||
NodeFit: true,
|
||||
},
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
// see https://github.com/kubernetes-sigs/descheduler/issues/564
|
||||
name: "Multiple constraints (6 nodes/2 zones, 4 pods)",
|
||||
@@ -686,7 +718,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node",
|
||||
name: "2 domains, sizes [2,0], maxSkew=1, move 1 pods since pod does not tolerate the tainted node",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
|
||||
@@ -718,6 +750,43 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node, and NodeFit is enabled",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
|
||||
n.Labels["zone"] = "zoneB"
|
||||
n.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: "taint-test",
|
||||
Value: "test",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
pods: createTestPods([]testPodList{
|
||||
{
|
||||
count: 1,
|
||||
node: "n1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
node: "n1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
nodeSelector: map[string]string{"zone": "zoneA"},
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 0,
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{
|
||||
NodeFit: true,
|
||||
},
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "2 domains, sizes [2,0], maxSkew=1, move 1 pod for node with PreferNoSchedule Taint",
|
||||
nodes: []*v1.Node{
|
||||
@@ -902,6 +971,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -6,25 +6,9 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/featuregate"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
// owner: @jinxu
|
||||
// beta: v1.10
|
||||
//
|
||||
// New local storage types to support local storage capacity isolation
|
||||
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"
|
||||
|
||||
// owner: @egernst
|
||||
// alpha: v1.16
|
||||
//
|
||||
// Enables PodOverhead, for accounting pod overheads which are specific to a given RuntimeClass
|
||||
PodOverhead featuregate.Feature = "PodOverhead"
|
||||
)
|
||||
|
||||
// GetResourceRequest finds and returns the request value for a specific resource.
|
||||
func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
|
||||
if resource == v1.ResourcePods {
|
||||
@@ -53,11 +37,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
|
||||
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
|
||||
}
|
||||
|
||||
if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(LocalStorageCapacityIsolation) {
|
||||
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
|
||||
return requestQuantity
|
||||
}
|
||||
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
|
||||
requestQuantity.Add(rQuantity)
|
||||
@@ -72,9 +51,9 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
|
||||
}
|
||||
}
|
||||
|
||||
// if PodOverhead feature is supported, add overhead for running a pod
|
||||
// to the total requests if the resource total is non-zero
|
||||
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) {
|
||||
// We assume pod overhead feature gate is enabled.
|
||||
// We can't import the scheduler settings so we will inherit the default.
|
||||
if pod.Spec.Overhead != nil {
|
||||
if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() {
|
||||
requestQuantity.Add(podOverhead)
|
||||
}
|
||||
@@ -162,9 +141,9 @@ func PodRequestsAndLimits(pod *v1.Pod) (reqs, limits v1.ResourceList) {
|
||||
maxResourceList(limits, container.Resources.Limits)
|
||||
}
|
||||
|
||||
// if PodOverhead feature is supported, add overhead for running a pod
|
||||
// to the sum of reqeuests and to non-zero limits:
|
||||
if pod.Spec.Overhead != nil && utilfeature.DefaultFeatureGate.Enabled(PodOverhead) {
|
||||
// We assume pod overhead feature gate is enabled.
|
||||
// We can't import the scheduler settings so we will inherit the default.
|
||||
if pod.Spec.Overhead != nil {
|
||||
addResourceList(reqs, pod.Spec.Overhead)
|
||||
|
||||
for name, quantity := range pod.Spec.Overhead {
|
||||
|
||||
@@ -144,6 +144,7 @@ func TestRemoveDuplicates(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
nodes,
|
||||
getPodsAssignedToNode,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -83,7 +83,7 @@ func TestFailedPods(t *testing.T) {
|
||||
defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy})
|
||||
waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed)
|
||||
|
||||
podEvictor := initPodEvictorOrFail(t, clientSet, nodes)
|
||||
podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
|
||||
|
||||
t.Logf("Running RemoveFailedPods strategy for %s", name)
|
||||
strategies.RemoveFailedPods(
|
||||
|
||||
@@ -39,7 +39,6 @@ import (
|
||||
v1qos "k8s.io/kubectl/pkg/util/qos"
|
||||
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
deschedulerapi "sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/client"
|
||||
@@ -199,6 +198,7 @@ func runPodLifetimeStrategy(
|
||||
nil,
|
||||
nil,
|
||||
nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
evictCritical,
|
||||
false,
|
||||
@@ -324,7 +324,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
waitForRCPodsRunning(ctx, t, clientSet, rc)
|
||||
|
||||
// Run LowNodeUtilization strategy
|
||||
podEvictor := initPodEvictorOrFail(t, clientSet, nodes)
|
||||
podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
|
||||
|
||||
podFilter, err := podutil.NewOptions().WithFilter(podEvictor.Evictable().IsEvictable).BuildFilterFunc()
|
||||
if err != nil {
|
||||
@@ -886,17 +886,6 @@ func TestEvictAnnotation(t *testing.T) {
|
||||
clientSet, nodeInformer, getPodsAssignedToNode, stopCh := initializeClient(t)
|
||||
defer close(stopCh)
|
||||
|
||||
nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Error listing node with %v", err)
|
||||
}
|
||||
|
||||
var nodes []*v1.Node
|
||||
for i := range nodeList.Items {
|
||||
node := nodeList.Items[i]
|
||||
nodes = append(nodes, &node)
|
||||
}
|
||||
|
||||
testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}}
|
||||
if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
|
||||
t.Fatalf("Unable to create ns %v", testNamespace.Name)
|
||||
@@ -977,7 +966,7 @@ func TestDeschedulingInterval(t *testing.T) {
|
||||
}
|
||||
s.Client = clientSet
|
||||
|
||||
deschedulerPolicy := &api.DeschedulerPolicy{}
|
||||
deschedulerPolicy := &deschedulerapi.DeschedulerPolicy{}
|
||||
|
||||
c := make(chan bool, 1)
|
||||
go func() {
|
||||
@@ -1349,7 +1338,7 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) {
|
||||
return allNodes, workerNodes
|
||||
}
|
||||
|
||||
func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor {
|
||||
func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, nodes []*v1.Node) *evictions.PodEvictor {
|
||||
evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet)
|
||||
if err != nil || len(evictionPolicyGroupVersion) == 0 {
|
||||
t.Fatalf("Error creating eviction policy group: %v", err)
|
||||
@@ -1361,6 +1350,7 @@ func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*
|
||||
nil,
|
||||
nil,
|
||||
nodes,
|
||||
getPodsAssignedToNode,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -137,6 +137,7 @@ func TestTooManyRestarts(t *testing.T) {
|
||||
nil,
|
||||
nil,
|
||||
nodes,
|
||||
getPodsAssignedToNode,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
|
||||
@@ -77,7 +77,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
defer deleteRC(ctx, t, clientSet, violatorRc)
|
||||
waitForRCPodsRunning(ctx, t, clientSet, violatorRc)
|
||||
|
||||
podEvictor := initPodEvictorOrFail(t, clientSet, nodes)
|
||||
podEvictor := initPodEvictorOrFail(t, clientSet, getPodsAssignedToNode, nodes)
|
||||
|
||||
// Run TopologySpreadConstraint strategy
|
||||
t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name)
|
||||
|
||||
Reference in New Issue
Block a user