From c7524705b3a7c0058576edeae07a3564dbe6d7c6 Mon Sep 17 00:00:00 2001 From: Jane Liu L Date: Fri, 1 Oct 2021 16:43:13 +0800 Subject: [PATCH] Ignore Pods With Deletion Timestamp --- README.md | 1 + pkg/descheduler/evictions/evictions.go | 4 ++ pkg/descheduler/strategies/failedpods_test.go | 40 ++++++++++++------- .../strategies/node_affinity_test.go | 26 ++++++++---- .../strategies/pod_antiaffinity_test.go | 15 +++++++ .../strategies/pod_lifetime_test.go | 30 +++++++++++++- .../strategies/topologyspreadconstraint.go | 3 +- pkg/utils/pod.go | 5 +++ 8 files changed, 99 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index e9a23ca03..8dd6438ff 100644 --- a/README.md +++ b/README.md @@ -749,6 +749,7 @@ best effort pods are evicted before burstable and guaranteed pods. * All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This annotation is used to override checks which prevent eviction and users can select which pod is evicted. Users should know how and if the pod will be recreated. +* Pods with a non-nil DeletionTimestamp are not evicted by default. Setting `--v=4` or greater on the Descheduler will log all reasons why any pod is not evictable. diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index b156618f9..c829ef734 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -290,6 +290,10 @@ func (ev *evictable) IsEvictable(pod *v1.Pod) bool { checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod")) } + if utils.IsPodTerminating(pod) { + checkErrs = append(checkErrs, fmt.Errorf("pod is terminating")) + } + for _, c := range ev.constraints { if err := c(pod); err != nil { checkErrs = append(checkErrs, err) diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/descheduler/strategies/failedpods_test.go index c5caf5a43..16749c719 100644 --- a/pkg/descheduler/strategies/failedpods_test.go +++ b/pkg/descheduler/strategies/failedpods_test.go @@ -66,7 +66,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", nil, &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }), + }, nil), }, }, { @@ -77,7 +77,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }, nil), + }, nil, nil), }, }, { @@ -88,7 +88,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", &v1.ContainerState{ Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"}, - }, nil), + }, nil, nil), }, }, { @@ -102,10 +102,10 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, - }, nil), + }, nil, nil), buildTestPod("p2", "node2", &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, - }, nil), + }, nil, nil), }, }, { @@ -116,7 +116,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", nil, &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, - }), + }, nil), }, }, { @@ -127,7 +127,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", nil, &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, - }), + }, nil), }, }, { @@ -138,7 +138,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", nil, &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }), + }, nil), }, }, { @@ -149,7 +149,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", &v1.ContainerState{ Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"}, - }, nil), + }, nil, nil), }, }, { @@ -160,7 +160,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", nil, &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }), + }, nil), }, }, { @@ -173,7 +173,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", nil, &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }), + }, nil), }, }, { @@ -184,7 +184,7 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }, nil), + }, nil, nil), }, }, { @@ -195,7 +195,18 @@ func TestRemoveFailedPods(t *testing.T) { pods: []v1.Pod{ buildTestPod("p1", "node1", &v1.ContainerState{ Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, - }, nil), + }, nil, nil), + }, + }, + { + description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 pod in termination; nothing should be moved", + strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }, nil, &metav1.Time{}), }, }, } @@ -260,7 +271,7 @@ func TestValidRemoveFailedPodsParams(t *testing.T) { } } -func buildTestPod(podName, nodeName string, initContainerState, containerState *v1.ContainerState) v1.Pod { +func buildTestPod(podName, nodeName string, initContainerState, containerState *v1.ContainerState, deletionTimestamp *metav1.Time) v1.Pod { pod := test.BuildTestPod(podName, 1, 1, nodeName, func(p *v1.Pod) { ps := v1.PodStatus{} @@ -278,5 +289,6 @@ func buildTestPod(podName, nodeName string, initContainerState, containerState * }) pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() pod.ObjectMeta.SetCreationTimestamp(metav1.Now()) + pod.DeletionTimestamp = deletionTimestamp return *pod } diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/descheduler/strategies/node_affinity_test.go index 390f5de18..8ec2ea517 100644 --- a/pkg/descheduler/strategies/node_affinity_test.go +++ b/pkg/descheduler/strategies/node_affinity_test.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -62,7 +63,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { unschedulableNodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue unschedulableNodeWithLabels.Spec.Unschedulable = true - addPodsToNode := func(node *v1.Node) []v1.Pod { + addPodsToNode := func(node *v1.Node, deletionTimestamp *metav1.Time) []v1.Pod { podWithNodeAffinity := test.BuildTestPod("podWithNodeAffinity", 100, 0, node.Name, nil) podWithNodeAffinity.Spec.Affinity = &v1.Affinity{ NodeAffinity: &v1.NodeAffinity{ @@ -91,6 +92,9 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { pod1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() pod2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod1.DeletionTimestamp = deletionTimestamp + pod2.DeletionTimestamp = deletionTimestamp + return []v1.Pod{ *podWithNodeAffinity, *pod1, @@ -117,7 +121,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { }, }, expectedEvictedPodCount: 0, - pods: addPodsToNode(nodeWithoutLabels), + pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxPodsToEvictPerNode: 0, }, @@ -125,7 +129,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { description: "Pod is correctly scheduled on node, no eviction expected", strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, expectedEvictedPodCount: 0, - pods: addPodsToNode(nodeWithLabels), + pods: addPodsToNode(nodeWithLabels, nil), nodes: []*v1.Node{nodeWithLabels}, maxPodsToEvictPerNode: 0, }, @@ -133,7 +137,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted", expectedEvictedPodCount: 1, strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, - pods: addPodsToNode(nodeWithoutLabels), + pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxPodsToEvictPerNode: 0, }, @@ -141,7 +145,15 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, should not be evicted", expectedEvictedPodCount: 1, strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, - pods: addPodsToNode(nodeWithoutLabels), + pods: addPodsToNode(nodeWithoutLabels, nil), + nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, + maxPodsToEvictPerNode: 1, + }, + { + description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, no pod evicted since pod terminting", + expectedEvictedPodCount: 1, + strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}), nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, maxPodsToEvictPerNode: 1, }, @@ -149,7 +161,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict", expectedEvictedPodCount: 0, strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy, - pods: addPodsToNode(nodeWithoutLabels), + pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels}, maxPodsToEvictPerNode: 0, }, @@ -157,7 +169,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { description: "Pod is scheduled on node without matching labels, and node where pod fits is available, should evict", expectedEvictedPodCount: 0, strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy, - pods: addPodsToNode(nodeWithoutLabels), + pods: addPodsToNode(nodeWithoutLabels, nil), nodes: []*v1.Node{nodeWithLabels, unschedulableNodeWithLabels}, maxPodsToEvictPerNode: 1, }, diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/descheduler/strategies/pod_antiaffinity_test.go index ab2f018a7..f0625d77b 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity_test.go +++ b/pkg/descheduler/strategies/pod_antiaffinity_test.go @@ -55,6 +55,10 @@ func TestPodAntiAffinity(t *testing.T) { p6 := test.BuildTestPod("p6", 100, 0, node1.Name, nil) p7 := test.BuildTestPod("p7", 100, 0, node1.Name, nil) p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil) + p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil) + p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil) + p9.DeletionTimestamp = &metav1.Time{} + p10.DeletionTimestamp = &metav1.Time{} criticalPriority := utils.SystemCriticalPriority nonEvictablePod := test.BuildTestPod("non-evict", 100, 0, node1.Name, func(pod *v1.Pod) { @@ -72,6 +76,8 @@ func TestPodAntiAffinity(t *testing.T) { test.SetNormalOwnerRef(p5) test.SetNormalOwnerRef(p6) test.SetNormalOwnerRef(p7) + test.SetNormalOwnerRef(p9) + test.SetNormalOwnerRef(p10) // set pod anti affinity setPodAntiAffinity(p1, "foo", "bar") @@ -80,6 +86,8 @@ func TestPodAntiAffinity(t *testing.T) { setPodAntiAffinity(p5, "foo1", "bar1") setPodAntiAffinity(p6, "foo1", "bar1") setPodAntiAffinity(p7, "foo", "bar") + setPodAntiAffinity(p9, "foo", "bar") + setPodAntiAffinity(p10, "foo", "bar") // set pod priority test.SetPodPriority(p5, 100) @@ -150,6 +158,13 @@ func TestPodAntiAffinity(t *testing.T) { expectedEvictedPodCount: 0, nodeFit: true, }, + { + description: "No pod to evicted since all pod terminating", + maxPodsToEvictPerNode: 0, + pods: []v1.Pod{*p9, *p10}, + nodes: []*v1.Node{node1}, + expectedEvictedPodCount: 0, + }, } for _, test := range tests { diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go index a4b2f165f..02198ffa7 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -125,6 +125,17 @@ func TestPodLifeTime(t *testing.T) { p12.ObjectMeta.OwnerReferences = ownerRef1 p13.ObjectMeta.OwnerReferences = ownerRef1 + p14 := test.BuildTestPod("p14", 100, 0, node1.Name, nil) + p15 := test.BuildTestPod("p15", 100, 0, node1.Name, nil) + p14.Namespace = "dev" + p15.Namespace = "dev" + p14.ObjectMeta.CreationTimestamp = olderPodCreationTime + p15.ObjectMeta.CreationTimestamp = olderPodCreationTime + p14.ObjectMeta.OwnerReferences = ownerRef1 + p15.ObjectMeta.OwnerReferences = ownerRef1 + p14.DeletionTimestamp = &metav1.Time{} + p15.DeletionTimestamp = &metav1.Time{} + var maxLifeTime uint = 600 testCases := []struct { description string @@ -231,7 +242,7 @@ func TestPodLifeTime(t *testing.T) { expectedEvictedPodCount: 1, }, { - description: "Two old pods with different labels, 1 selected by labelSelector", + description: "No pod to evicted since all pod terminating", strategy: api.DeschedulerStrategy{ Enabled: true, Params: &api.StrategyParameters{ @@ -246,11 +257,26 @@ func TestPodLifeTime(t *testing.T) { nodes: []*v1.Node{node1}, expectedEvictedPodCount: 1, }, + { + description: "No pod should be evicted since pod terminating", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: &api.StrategyParameters{ + PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"foo": "bar"}, + }, + }, + }, + maxPodsToEvictPerNode: 5, + pods: []v1.Pod{*p14, *p15}, + nodes: []*v1.Node{node1}, + expectedEvictedPodCount: 0, + }, } for _, tc := range testCases { fakeClient := &fake.Clientset{} - fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { return true, &v1.PodList{Items: tc.pods}, nil }) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index f51d97ea4..6b56a52b0 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -141,10 +141,9 @@ func RemovePodsViolatingTopologySpreadConstraint( var sumPods float64 for i := range namespacePods.Items { // skip pods that are being deleted. - if namespacePods.Items[i].DeletionTimestamp != nil { + if utils.IsPodTerminating(&namespacePods.Items[i]) { continue } - // 4. if the pod matches this TopologySpreadConstraint LabelSelector if !selector.Matches(labels.Set(namespacePods.Items[i].Labels)) { continue diff --git a/pkg/utils/pod.go b/pkg/utils/pod.go index 288607d07..c252888ea 100644 --- a/pkg/utils/pod.go +++ b/pkg/utils/pod.go @@ -89,6 +89,11 @@ func IsMirrorPod(pod *v1.Pod) bool { return ok } +// IsPodTerminating returns true if the pod DeletionTimestamp is set. +func IsPodTerminating(pod *v1.Pod) bool { + return pod.DeletionTimestamp != nil +} + // IsStaticPod returns true if the pod is a static pod. func IsStaticPod(pod *v1.Pod) bool { source, err := GetPodSource(pod)