From 9aad51f328a2aafefa3374b98def090e2a3b7445 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Wed, 7 Jun 2023 21:00:47 -0400 Subject: [PATCH 1/2] Revert "Merge pull request #1164 from a7i/podlifetime-CrashLoopBackOff" This reverts commit 699297711a5ac3661e2362b36d37ba8e87d3b32d, reversing changes made to 877d9b18ee5e06485c4fa363a48ec590f2859244. --- pkg/framework/plugins/podlifetime/validation.go | 4 +--- pkg/framework/plugins/podlifetime/validation_test.go | 11 +---------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/pkg/framework/plugins/podlifetime/validation.go b/pkg/framework/plugins/podlifetime/validation.go index 02188f940..973dceb6a 100644 --- a/pkg/framework/plugins/podlifetime/validation.go +++ b/pkg/framework/plugins/podlifetime/validation.go @@ -44,14 +44,12 @@ func ValidatePodLifeTimeArgs(obj runtime.Object) error { } } podLifeTimeAllowedStates := sets.New( - // Pod phase reasons string(v1.PodRunning), string(v1.PodPending), - // Container state reasons + // Container state reasons: https://github.com/kubernetes/kubernetes/blob/release-1.24/pkg/kubelet/kubelet_pods.go#L76-L79 "PodInitializing", "ContainerCreating", - "CrashLoopBackOff", ) if !podLifeTimeAllowedStates.HasAll(args.States...) { diff --git a/pkg/framework/plugins/podlifetime/validation_test.go b/pkg/framework/plugins/podlifetime/validation_test.go index 3dfe280af..b42797427 100644 --- a/pkg/framework/plugins/podlifetime/validation_test.go +++ b/pkg/framework/plugins/podlifetime/validation_test.go @@ -20,7 +20,6 @@ import ( "testing" v1 "k8s.io/api/core/v1" - utilpointer "k8s.io/utils/pointer" ) func TestValidateRemovePodLifeTimeArgs(t *testing.T) { @@ -32,7 +31,7 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) { { description: "valid arg, no errors", args: &PodLifeTimeArgs{ - MaxPodLifeTimeSeconds: utilpointer.Uint(1), + MaxPodLifeTimeSeconds: func(i uint) *uint { return &i }(1), States: []string{string(v1.PodRunning)}, }, expectError: false, @@ -51,14 +50,6 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) { }, expectError: true, }, - { - description: "allows CrashLoopBackOff state", - args: &PodLifeTimeArgs{ - MaxPodLifeTimeSeconds: utilpointer.Uint(1), - States: []string{"CrashLoopBackOff"}, - }, - expectError: false, - }, } for _, tc := range testCases { From 0bdbf51eb22242375a0dd6f29cb6ec6b2cdfe217 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Wed, 7 Jun 2023 21:30:13 -0400 Subject: [PATCH 2/2] Move CrashLoopBackOff container state from PodLifeTime to TooManyRestarts plugin --- README.md | 8 ++ examples/too-many-restarts.yml | 15 ++++ .../defaults.go | 3 + .../defaults_test.go | 4 + .../toomanyrestarts.go | 17 +++++ .../toomanyrestarts_test.go | 63 +++++++++++++++- .../removepodshavingtoomanyrestarts/types.go | 1 + .../validation.go | 14 ++++ .../validation_test.go | 74 +++++++++++++++++++ .../zz_generated.deepcopy.go | 5 ++ 10 files changed, 201 insertions(+), 3 deletions(-) create mode 100644 examples/too-many-restarts.yml create mode 100644 pkg/framework/plugins/removepodshavingtoomanyrestarts/validation_test.go diff --git a/README.md b/README.md index 60d7d93b6..b1febbc69 100644 --- a/README.md +++ b/README.md @@ -561,6 +561,13 @@ include `podRestartThreshold`, which is the number of restarts (summed over all should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored into that calculation. +You can also specify `states` parameter to **only** evict pods matching the following conditions: +- [Pod Phase](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase) status of: `Running` +- [Container State Waiting](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting) of: `CrashLoopBackOff` + +If a value for `states` or `podStatusPhases` is not specified, +Pods in any state (even `Running`) are considered for eviction. + **Parameters:** |Name|Type| @@ -569,6 +576,7 @@ into that calculation. |`includingInitContainers`|bool| |`namespaces`|(see [namespace filtering](#namespace-filtering))| |`labelSelector`|(see [label filtering](#label-filtering))| +|`states`|list(string)|Only supported in v0.28+| **Example:** diff --git a/examples/too-many-restarts.yml b/examples/too-many-restarts.yml new file mode 100644 index 000000000..fe95adfef --- /dev/null +++ b/examples/too-many-restarts.yml @@ -0,0 +1,15 @@ +apiVersion: "descheduler/v1alpha2" +kind: "DeschedulerPolicy" +profiles: + - name: ProfileName + pluginConfig: + - name: "RemovePodsHavingTooManyRestarts" + args: + podRestartThreshold: 100 + includingInitContainers: true + states: + - CrashLoopBackOff + plugins: + deschedule: + enabled: + - "RemovePodsHavingTooManyRestarts" diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults.go index 539f66160..e26394265 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults.go @@ -37,4 +37,7 @@ func SetDefaults_RemovePodsHavingTooManyRestartsArgs(obj runtime.Object) { if !args.IncludingInitContainers { args.IncludingInitContainers = false } + if args.States == nil { + args.States = nil + } } diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults_test.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults_test.go index 128359489..f3c988ed1 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults_test.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/defaults_test.go @@ -17,6 +17,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -37,6 +38,7 @@ func TestSetDefaults_RemovePodsHavingTooManyRestartsArgs(t *testing.T) { LabelSelector: nil, PodRestartThreshold: 0, IncludingInitContainers: false, + States: nil, }, }, { @@ -46,12 +48,14 @@ func TestSetDefaults_RemovePodsHavingTooManyRestartsArgs(t *testing.T) { LabelSelector: &metav1.LabelSelector{}, PodRestartThreshold: 10, IncludingInitContainers: true, + States: []string{string(v1.PodRunning)}, }, want: &RemovePodsHavingTooManyRestartsArgs{ Namespaces: &api.Namespaces{}, LabelSelector: &metav1.LabelSelector{}, PodRestartThreshold: 10, IncludingInitContainers: true, + States: []string{string(v1.PodRunning)}, }, }, } diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go index 4d4299867..4db4c16ab 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts.go @@ -75,6 +75,23 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug return true }) + if len(tooManyRestartsArgs.States) > 0 { + states := sets.New(tooManyRestartsArgs.States...) + podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool { + if states.Has(string(pod.Status.Phase)) { + return true + } + + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil && states.Has(containerStatus.State.Waiting.Reason) { + return true + } + } + + return false + }) + } + return &RemovePodsHavingTooManyRestarts{ handle: handle, args: tooManyRestartsArgs, diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go index ea99b5f0d..45dfd9d56 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/toomanyrestarts_test.go @@ -104,8 +104,6 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { node4 := test.BuildTestNode("node4", 200, 3000, 10, nil) node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil) - pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil)) - createRemovePodsHavingTooManyRestartsAgrs := func( podRestartThresholds int32, includingInitContainers bool, @@ -126,6 +124,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { maxPodsToEvictPerNode *uint maxNoOfPodsToEvictPerNamespace *uint nodeFit bool + applyFunc func([]*v1.Pod) }{ { description: "All pods have total restarts under threshold, no pod evictions", @@ -191,7 +190,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { maxNoOfPodsToEvictPerNamespace: &uint3, }, { - description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tained, 0 pod evictions", + description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tainted, 0 pod evictions", args: createRemovePodsHavingTooManyRestartsAgrs(1, true), nodes: []*v1.Node{node1, node2}, expectedEvictedPodCount: 0, @@ -222,10 +221,68 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) { maxPodsToEvictPerNode: &uint3, nodeFit: true, }, + { + description: "pods are in CrashLoopBackOff with states=CrashLoopBackOff, 3 pod evictions", + args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{"CrashLoopBackOff"}}, + nodes: []*v1.Node{node1, node5}, + expectedEvictedPodCount: 3, + maxPodsToEvictPerNode: &uint3, + nodeFit: true, + applyFunc: func(pods []*v1.Pod) { + for _, pod := range pods { + if len(pod.Status.ContainerStatuses) > 0 { + pod.Status.ContainerStatuses[0].State = v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{Reason: "CrashLoopBackOff"}, + } + } + } + }, + }, + { + description: "pods without CrashLoopBackOff with states=CrashLoopBackOff, 0 pod evictions", + args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{"CrashLoopBackOff"}}, + nodes: []*v1.Node{node1, node5}, + expectedEvictedPodCount: 0, + maxPodsToEvictPerNode: &uint3, + nodeFit: true, + }, + { + description: "pods running with state=Running, 3 pod evictions", + args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{string(v1.PodRunning)}}, + nodes: []*v1.Node{node1}, + expectedEvictedPodCount: 3, + maxPodsToEvictPerNode: &uint3, + applyFunc: func(pods []*v1.Pod) { + for _, pod := range pods { + pod.Status.Phase = v1.PodRunning + } + }, + }, + { + description: "pods pending with state=Running, 0 pod evictions", + args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{string(v1.PodRunning)}}, + nodes: []*v1.Node{node1}, + expectedEvictedPodCount: 0, + maxPodsToEvictPerNode: &uint3, + applyFunc: func(pods []*v1.Pod) { + for _, pod := range pods { + pod.Status.Phase = v1.PodPending + } + }, + }, } for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { + pods := append( + initPods(node1), + test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil), + test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil), + ) + if tc.applyFunc != nil { + tc.applyFunc(pods) + } + ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/types.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/types.go index 45c29f252..3a9e51003 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/types.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/types.go @@ -29,4 +29,5 @@ type RemovePodsHavingTooManyRestartsArgs struct { LabelSelector *metav1.LabelSelector `json:"labelSelector"` PodRestartThreshold int32 `json:"podRestartThreshold"` IncludingInitContainers bool `json:"includingInitContainers"` + States []string `json:"states"` } diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation.go index d734f3c74..e1935a43f 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation.go @@ -16,8 +16,10 @@ package removepodshavingtoomanyrestarts import ( "fmt" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" ) // ValidateRemovePodsHavingTooManyRestartsArgs validates RemovePodsHavingTooManyRestarts arguments @@ -38,5 +40,17 @@ func ValidateRemovePodsHavingTooManyRestartsArgs(obj runtime.Object) error { return fmt.Errorf("invalid PodsHavingTooManyRestarts threshold") } + allowedStates := sets.New( + // Pod phases: + string(v1.PodRunning), + + // Container state reasons: + "CrashLoopBackOff", + ) + + if !allowedStates.HasAll(args.States...) { + return fmt.Errorf("states must be one of %v", allowedStates.UnsortedList()) + } + return nil } diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation_test.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation_test.go new file mode 100644 index 000000000..03578f308 --- /dev/null +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/validation_test.go @@ -0,0 +1,74 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package removepodshavingtoomanyrestarts + +import ( + "testing" + + v1 "k8s.io/api/core/v1" +) + +func TestValidateRemovePodsHavingTooManyRestartsArgs(t *testing.T) { + testCases := []struct { + description string + args *RemovePodsHavingTooManyRestartsArgs + expectError bool + }{ + { + description: "valid arg, no errors", + args: &RemovePodsHavingTooManyRestartsArgs{ + PodRestartThreshold: 1, + States: []string{string(v1.PodRunning)}, + }, + expectError: false, + }, + { + description: "invalid PodRestartThreshold arg, expects errors", + args: &RemovePodsHavingTooManyRestartsArgs{ + PodRestartThreshold: 0, + }, + expectError: true, + }, + { + description: "invalid States arg, expects errors", + args: &RemovePodsHavingTooManyRestartsArgs{ + PodRestartThreshold: 1, + States: []string{string(v1.PodFailed)}, + }, + expectError: true, + }, + { + description: "allows CrashLoopBackOff state", + args: &RemovePodsHavingTooManyRestartsArgs{ + PodRestartThreshold: 1, + States: []string{"CrashLoopBackOff"}, + }, + expectError: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := ValidateRemovePodsHavingTooManyRestartsArgs(tc.args) + + hasError := err != nil + if tc.expectError != hasError { + t.Error("unexpected arg validation behavior") + } + }) + } +} diff --git a/pkg/framework/plugins/removepodshavingtoomanyrestarts/zz_generated.deepcopy.go b/pkg/framework/plugins/removepodshavingtoomanyrestarts/zz_generated.deepcopy.go index 0971574ee..b00804908 100644 --- a/pkg/framework/plugins/removepodshavingtoomanyrestarts/zz_generated.deepcopy.go +++ b/pkg/framework/plugins/removepodshavingtoomanyrestarts/zz_generated.deepcopy.go @@ -41,6 +41,11 @@ func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyInto(out *RemovePodsHavin *out = new(v1.LabelSelector) (*in).DeepCopyInto(*out) } + if in.States != nil { + in, out := &in.States, &out.States + *out = make([]string, len(*in)) + copy(*out, *in) + } return }