From c1a63a557a5ee0c06e922d315fc49aacf36f3a0b Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Mon, 11 Jan 2021 11:19:02 -0500 Subject: [PATCH] Add option to ignore pods with PVCs from eviction --- README.md | 5 ++- pkg/api/types.go | 3 ++ pkg/api/v1alpha1/types.go | 3 ++ pkg/api/v1alpha1/zz_generated.conversion.go | 2 + pkg/api/v1alpha1/zz_generated.deepcopy.go | 5 +++ pkg/api/zz_generated.deepcopy.go | 5 +++ pkg/apis/componentconfig/types.go | 3 ++ pkg/apis/componentconfig/v1alpha1/types.go | 3 ++ .../v1alpha1/zz_generated.conversion.go | 2 + pkg/descheduler/descheduler.go | 6 +++ pkg/descheduler/evictions/evictions.go | 20 ++++++++++ pkg/descheduler/strategies/duplicates_test.go | 2 + .../strategies/lownodeutilization_test.go | 2 + .../strategies/node_affinity_test.go | 1 + pkg/descheduler/strategies/node_taint_test.go | 1 + .../strategies/pod_antiaffinity_test.go | 1 + .../strategies/pod_lifetime_test.go | 40 +++++++++++++++++++ .../strategies/toomanyrestarts_test.go | 1 + .../topologyspreadconstraint_test.go | 1 + test/e2e/e2e_test.go | 2 + 20 files changed, 107 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 982fa1f7d..d72981a55 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,7 @@ parameters associated with the strategies can be configured too. By default, all The policy also includes common configuration for all the strategies: - `nodeSelector` - limiting the nodes which are processed - `evictLocalStoragePods` - allowing to evict pods with local storage +- `ignorePvcPods` - set whether PVC pods should be evicted or ignored (defaults to `false`) - `maxNoOfPodsToEvictPerNode` - maximum number of pods evicted from each node (summed through all strategies) ```yaml @@ -119,6 +120,7 @@ kind: "DeschedulerPolicy" nodeSelector: prod=dev evictLocalStoragePods: true maxNoOfPodsToEvictPerNode: 40 +ignorePvcPods: false strategies: ... ``` @@ -497,9 +499,10 @@ When the descheduler decides to evict pods from a node, it employs the following never evicted because these pods won't be recreated. * Pods associated with DaemonSets are never evicted. * Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set) +* Pods with PVCs are evicted unless `ignorePvcPods: true` is set. * In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority, best effort pods are evicted before burstable and guaranteed pods. -* All types of pods with the annotation descheduler.alpha.kubernetes.io/evict are evicted. This +* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This annotation is used to override checks which prevent eviction and users can select which pod is evicted. Users should know how and if the pod will be recreated. diff --git a/pkg/api/types.go b/pkg/api/types.go index be899ebb5..925ab21db 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -35,6 +35,9 @@ type DeschedulerPolicy struct { // EvictLocalStoragePods allows pods using local storage to be evicted. EvictLocalStoragePods *bool + // IgnorePVCPods prevents pods with PVCs from being evicted. + IgnorePVCPods *bool + // MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node. MaxNoOfPodsToEvictPerNode *int } diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index f5717b33d..a4affb919 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -35,6 +35,9 @@ type DeschedulerPolicy struct { // EvictLocalStoragePods allows pods using local storage to be evicted. EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"` + // IgnorePVCPods prevents pods with PVCs from being evicted. + IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"` + // MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node. MaxNoOfPodsToEvictPerNode *int `json:"maxNoOfPodsToEvictPerNode,omitempty"` } diff --git a/pkg/api/v1alpha1/zz_generated.conversion.go b/pkg/api/v1alpha1/zz_generated.conversion.go index e94a9cf4d..3b0b8f919 100644 --- a/pkg/api/v1alpha1/zz_generated.conversion.go +++ b/pkg/api/v1alpha1/zz_generated.conversion.go @@ -122,6 +122,7 @@ func autoConvert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Desched out.Strategies = *(*api.StrategyList)(unsafe.Pointer(&in.Strategies)) out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector)) out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods)) + out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods)) out.MaxNoOfPodsToEvictPerNode = (*int)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNode)) return nil } @@ -135,6 +136,7 @@ func autoConvert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.Des out.Strategies = *(*StrategyList)(unsafe.Pointer(&in.Strategies)) out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector)) out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods)) + out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods)) out.MaxNoOfPodsToEvictPerNode = (*int)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNode)) return nil } diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 608cd3503..96f82b08a 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -45,6 +45,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(bool) **out = **in } + if in.IgnorePVCPods != nil { + in, out := &in.IgnorePVCPods, &out.IgnorePVCPods + *out = new(bool) + **out = **in + } if in.MaxNoOfPodsToEvictPerNode != nil { in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode *out = new(int) diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index 8302fb6f1..4aeb27eec 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -45,6 +45,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(bool) **out = **in } + if in.IgnorePVCPods != nil { + in, out := &in.IgnorePVCPods, &out.IgnorePVCPods + *out = new(bool) + **out = **in + } if in.MaxNoOfPodsToEvictPerNode != nil { in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode *out = new(int) diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 02b5915cb..d5aa919b9 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -50,6 +50,9 @@ type DeschedulerConfiguration struct { // EvictLocalStoragePods allows pods using local storage to be evicted. EvictLocalStoragePods bool + // IgnorePVCPods sets whether PVC pods should be allowed to be evicted + IgnorePVCPods bool + // Logging specifies the options of logging. // Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information. Logging componentbaseconfig.LoggingConfiguration diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index ea21ea58e..279f7f819 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -50,6 +50,9 @@ type DeschedulerConfiguration struct { // EvictLocalStoragePods allows pods using local storage to be evicted. EvictLocalStoragePods bool `json:"evictLocalStoragePods,omitempty"` + // IgnorePVCPods sets whether PVC pods should be allowed to be evicted + IgnorePVCPods bool `json:"ignorePvcPods,omitempty"` + // Logging specifies the options of logging. // Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information. Logging componentbaseconfig.LoggingConfiguration `json:"logging,omitempty"` diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go index c6a6c2641..8bb810eaa 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go @@ -56,6 +56,7 @@ func autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_Deschedule out.NodeSelector = in.NodeSelector out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode out.EvictLocalStoragePods = in.EvictLocalStoragePods + out.IgnorePVCPods = in.IgnorePVCPods out.Logging = in.Logging return nil } @@ -73,6 +74,7 @@ func autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_Deschedule out.NodeSelector = in.NodeSelector out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode out.EvictLocalStoragePods = in.EvictLocalStoragePods + out.IgnorePVCPods = in.IgnorePVCPods out.Logging = in.Logging return nil } diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 097be18ee..0d02e3aed 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -90,6 +90,11 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods } + ignorePvcPods := false + if deschedulerPolicy.IgnorePVCPods != nil { + ignorePvcPods = *deschedulerPolicy.IgnorePVCPods + } + maxNoOfPodsToEvictPerNode := rs.MaxNoOfPodsToEvictPerNode if deschedulerPolicy.MaxNoOfPodsToEvictPerNode != nil { maxNoOfPodsToEvictPerNode = *deschedulerPolicy.MaxNoOfPodsToEvictPerNode @@ -116,6 +121,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer maxNoOfPodsToEvictPerNode, nodes, evictLocalStoragePods, + ignorePvcPods, ) for name, f := range strategyFuncs { diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 4f33de1d4..124b5a909 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -51,6 +51,7 @@ type PodEvictor struct { maxPodsToEvictPerNode int nodepodCount nodePodEvictedCount evictLocalStoragePods bool + ignorePvcPods bool } func NewPodEvictor( @@ -60,6 +61,7 @@ func NewPodEvictor( maxPodsToEvictPerNode int, nodes []*v1.Node, evictLocalStoragePods bool, + ignorePvcPods bool, ) *PodEvictor { var nodePodCount = make(nodePodEvictedCount) for _, node := range nodes { @@ -74,6 +76,7 @@ func NewPodEvictor( maxPodsToEvictPerNode: maxPodsToEvictPerNode, nodepodCount: nodePodCount, evictLocalStoragePods: evictLocalStoragePods, + ignorePvcPods: ignorePvcPods, } } @@ -189,6 +192,14 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable { return nil }) } + if pe.ignorePvcPods { + ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { + if IsPodWithPVC(pod) { + return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods") + } + return nil + }) + } if options.priority != nil { ev.constraints = append(ev.constraints, func(pod *v1.Pod) error { if IsPodEvictableBasedOnPriority(pod, *options.priority) { @@ -267,6 +278,15 @@ func IsPodWithLocalStorage(pod *v1.Pod) bool { return false } +func IsPodWithPVC(pod *v1.Pod) bool { + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil { + return true + } + } + return false +} + // IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec. func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool { return pod.Spec.Priority == nil || *pod.Spec.Priority < priority diff --git a/pkg/descheduler/strategies/duplicates_test.go b/pkg/descheduler/strategies/duplicates_test.go index 0b9060009..8954feefb 100644 --- a/pkg/descheduler/strategies/duplicates_test.go +++ b/pkg/descheduler/strategies/duplicates_test.go @@ -210,6 +210,7 @@ func TestFindDuplicatePods(t *testing.T) { testCase.maxPodsToEvictPerNode, []*v1.Node{node1, node2}, false, + false, ) RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node1, node2}, podEvictor) @@ -405,6 +406,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) { testCase.maxPodsToEvictPerNode, testCase.nodes, false, + false, ) RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/lownodeutilization_test.go b/pkg/descheduler/strategies/lownodeutilization_test.go index b14511d98..851afcc0b 100644 --- a/pkg/descheduler/strategies/lownodeutilization_test.go +++ b/pkg/descheduler/strategies/lownodeutilization_test.go @@ -444,6 +444,7 @@ func TestLowNodeUtilization(t *testing.T) { test.maxPodsToEvictPerNode, nodes, false, + false, ) strategy := api.DeschedulerStrategy{ @@ -764,6 +765,7 @@ func TestWithTaints(t *testing.T) { item.evictionsExpected, item.nodes, false, + false, ) LowNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/descheduler/strategies/node_affinity_test.go index b728ff039..ceb4772ab 100644 --- a/pkg/descheduler/strategies/node_affinity_test.go +++ b/pkg/descheduler/strategies/node_affinity_test.go @@ -158,6 +158,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) { tc.maxPodsToEvictPerNode, tc.nodes, false, + false, ) RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/node_taint_test.go b/pkg/descheduler/strategies/node_taint_test.go index 0156c6a3a..c9515701e 100644 --- a/pkg/descheduler/strategies/node_taint_test.go +++ b/pkg/descheduler/strategies/node_taint_test.go @@ -171,6 +171,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) { tc.maxPodsToEvictPerNode, tc.nodes, tc.evictLocalStoragePods, + false, ) RemovePodsViolatingNodeTaints(ctx, fakeClient, api.DeschedulerStrategy{}, tc.nodes, podEvictor) diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/descheduler/strategies/pod_antiaffinity_test.go index 60c8a0537..5c9970a5e 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity_test.go +++ b/pkg/descheduler/strategies/pod_antiaffinity_test.go @@ -120,6 +120,7 @@ func TestPodAntiAffinity(t *testing.T) { test.maxPodsToEvictPerNode, []*v1.Node{node}, false, + false, ) RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, api.DeschedulerStrategy{}, []*v1.Node{node}, podEvictor) diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go index 41e361b2c..77ed903d2 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -98,6 +98,19 @@ func TestPodLifeTime(t *testing.T) { p9.ObjectMeta.OwnerReferences = ownerRef1 p10.ObjectMeta.OwnerReferences = ownerRef1 + p11 := test.BuildTestPod("p11", 100, 0, node.Name, func(pod *v1.Pod) { + pod.Spec.Volumes = []v1.Volume{ + { + Name: "pvc", VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ClaimName: "foo"}, + }, + }, + } + pod.Namespace = "dev" + pod.ObjectMeta.CreationTimestamp = olderPodCreationTime + pod.ObjectMeta.OwnerReferences = ownerRef1 + }) + var maxLifeTime uint = 600 testCases := []struct { description string @@ -105,6 +118,7 @@ func TestPodLifeTime(t *testing.T) { maxPodsToEvictPerNode int pods []v1.Pod expectedEvictedPodCount int + ignorePvcPods bool }{ { description: "Two pods in the `dev` Namespace, 1 is new and 1 very is old. 1 should be evicted.", @@ -169,6 +183,31 @@ func TestPodLifeTime(t *testing.T) { pods: []v1.Pod{*p9, *p10}, expectedEvictedPodCount: 1, }, + { + description: "does not evict pvc pods with ignorePvcPods set to true", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: &api.StrategyParameters{ + PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, + }, + }, + maxPodsToEvictPerNode: 5, + pods: []v1.Pod{*p11}, + expectedEvictedPodCount: 0, + ignorePvcPods: true, + }, + { + description: "evicts pvc pods with ignorePvcPods set to false (or unset)", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: &api.StrategyParameters{ + PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, + }, + }, + maxPodsToEvictPerNode: 5, + pods: []v1.Pod{*p11}, + expectedEvictedPodCount: 1, + }, } for _, tc := range testCases { @@ -186,6 +225,7 @@ func TestPodLifeTime(t *testing.T) { tc.maxPodsToEvictPerNode, []*v1.Node{node}, false, + tc.ignorePvcPods, ) PodLifeTime(ctx, fakeClient, tc.strategy, []*v1.Node{node}, podEvictor) diff --git a/pkg/descheduler/strategies/toomanyrestarts_test.go b/pkg/descheduler/strategies/toomanyrestarts_test.go index 1a9fc0fc6..4f038ae9e 100644 --- a/pkg/descheduler/strategies/toomanyrestarts_test.go +++ b/pkg/descheduler/strategies/toomanyrestarts_test.go @@ -172,6 +172,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { tc.maxPodsToEvictPerNode, []*v1.Node{node}, false, + false, ) RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, []*v1.Node{node}, podEvictor) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index c0602fc23..6f6c7ec92 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -368,6 +368,7 @@ func TestTopologySpreadConstraint(t *testing.T) { 100, tc.nodes, false, + false, ) RemovePodsViolatingTopologySpreadConstraint(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor) podsEvicted := podEvictor.TotalEvicted() diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index f06f3d02c..a0de14c52 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -220,6 +220,7 @@ func runPodLifetimeStrategy(ctx context.Context, clientset clientset.Interface, 0, nodes, false, + false, ), ) } @@ -648,6 +649,7 @@ func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface, 0, nodeList, true, + false, ) for _, node := range nodeList { // Skip the Master Node