1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #1165 from a7i/toomanyrestarts-CrashLoopBackOff

TooManyRestart: state filter for CrashLoopBackOff
This commit is contained in:
Kubernetes Prow Robot
2023-06-13 09:47:59 -07:00
committed by GitHub
12 changed files with 203 additions and 16 deletions

View File

@@ -561,6 +561,13 @@ include `podRestartThreshold`, which is the number of restarts (summed over all
should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored
into that calculation.
You can also specify `states` parameter to **only** evict pods matching the following conditions:
- [Pod Phase](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase) status of: `Running`
- [Container State Waiting](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting) of: `CrashLoopBackOff`
If a value for `states` or `podStatusPhases` is not specified,
Pods in any state (even `Running`) are considered for eviction.
**Parameters:**
|Name|Type|
@@ -569,6 +576,7 @@ into that calculation.
|`includingInitContainers`|bool|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`states`|list(string)|Only supported in v0.28+|
**Example:**

View File

@@ -0,0 +1,15 @@
apiVersion: "descheduler/v1alpha2"
kind: "DeschedulerPolicy"
profiles:
- name: ProfileName
pluginConfig:
- name: "RemovePodsHavingTooManyRestarts"
args:
podRestartThreshold: 100
includingInitContainers: true
states:
- CrashLoopBackOff
plugins:
deschedule:
enabled:
- "RemovePodsHavingTooManyRestarts"

View File

@@ -44,14 +44,12 @@ func ValidatePodLifeTimeArgs(obj runtime.Object) error {
}
}
podLifeTimeAllowedStates := sets.New(
// Pod phase reasons
string(v1.PodRunning),
string(v1.PodPending),
// Container state reasons
// Container state reasons: https://github.com/kubernetes/kubernetes/blob/release-1.24/pkg/kubelet/kubelet_pods.go#L76-L79
"PodInitializing",
"ContainerCreating",
"CrashLoopBackOff",
)
if !podLifeTimeAllowedStates.HasAll(args.States...) {

View File

@@ -20,7 +20,6 @@ import (
"testing"
v1 "k8s.io/api/core/v1"
utilpointer "k8s.io/utils/pointer"
)
func TestValidateRemovePodLifeTimeArgs(t *testing.T) {
@@ -32,7 +31,7 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) {
{
description: "valid arg, no errors",
args: &PodLifeTimeArgs{
MaxPodLifeTimeSeconds: utilpointer.Uint(1),
MaxPodLifeTimeSeconds: func(i uint) *uint { return &i }(1),
States: []string{string(v1.PodRunning)},
},
expectError: false,
@@ -51,14 +50,6 @@ func TestValidateRemovePodLifeTimeArgs(t *testing.T) {
},
expectError: true,
},
{
description: "allows CrashLoopBackOff state",
args: &PodLifeTimeArgs{
MaxPodLifeTimeSeconds: utilpointer.Uint(1),
States: []string{"CrashLoopBackOff"},
},
expectError: false,
},
}
for _, tc := range testCases {

View File

@@ -37,4 +37,7 @@ func SetDefaults_RemovePodsHavingTooManyRestartsArgs(obj runtime.Object) {
if !args.IncludingInitContainers {
args.IncludingInitContainers = false
}
if args.States == nil {
args.States = nil
}
}

View File

@@ -17,6 +17,7 @@ import (
"testing"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -37,6 +38,7 @@ func TestSetDefaults_RemovePodsHavingTooManyRestartsArgs(t *testing.T) {
LabelSelector: nil,
PodRestartThreshold: 0,
IncludingInitContainers: false,
States: nil,
},
},
{
@@ -46,12 +48,14 @@ func TestSetDefaults_RemovePodsHavingTooManyRestartsArgs(t *testing.T) {
LabelSelector: &metav1.LabelSelector{},
PodRestartThreshold: 10,
IncludingInitContainers: true,
States: []string{string(v1.PodRunning)},
},
want: &RemovePodsHavingTooManyRestartsArgs{
Namespaces: &api.Namespaces{},
LabelSelector: &metav1.LabelSelector{},
PodRestartThreshold: 10,
IncludingInitContainers: true,
States: []string{string(v1.PodRunning)},
},
},
}

View File

@@ -75,6 +75,23 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug
return true
})
if len(tooManyRestartsArgs.States) > 0 {
states := sets.New(tooManyRestartsArgs.States...)
podFilter = podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
if states.Has(string(pod.Status.Phase)) {
return true
}
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.State.Waiting != nil && states.Has(containerStatus.State.Waiting.Reason) {
return true
}
}
return false
})
}
return &RemovePodsHavingTooManyRestarts{
handle: handle,
args: tooManyRestartsArgs,

View File

@@ -104,8 +104,6 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
node4 := test.BuildTestNode("node4", 200, 3000, 10, nil)
node5 := test.BuildTestNode("node5", 2000, 3000, 10, nil)
pods := append(append(initPods(node1), test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil)), test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil))
createRemovePodsHavingTooManyRestartsAgrs := func(
podRestartThresholds int32,
includingInitContainers bool,
@@ -126,6 +124,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
maxPodsToEvictPerNode *uint
maxNoOfPodsToEvictPerNamespace *uint
nodeFit bool
applyFunc func([]*v1.Pod)
}{
{
description: "All pods have total restarts under threshold, no pod evictions",
@@ -191,7 +190,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
maxNoOfPodsToEvictPerNamespace: &uint3,
},
{
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tained, 0 pod evictions",
description: "All pods have total restarts equals threshold(maxPodsToEvictPerNode=3) but the only other node is tainted, 0 pod evictions",
args: createRemovePodsHavingTooManyRestartsAgrs(1, true),
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
@@ -222,10 +221,68 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
maxPodsToEvictPerNode: &uint3,
nodeFit: true,
},
{
description: "pods are in CrashLoopBackOff with states=CrashLoopBackOff, 3 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{"CrashLoopBackOff"}},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 3,
maxPodsToEvictPerNode: &uint3,
nodeFit: true,
applyFunc: func(pods []*v1.Pod) {
for _, pod := range pods {
if len(pod.Status.ContainerStatuses) > 0 {
pod.Status.ContainerStatuses[0].State = v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{Reason: "CrashLoopBackOff"},
}
}
}
},
},
{
description: "pods without CrashLoopBackOff with states=CrashLoopBackOff, 0 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{"CrashLoopBackOff"}},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
nodeFit: true,
},
{
description: "pods running with state=Running, 3 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{string(v1.PodRunning)}},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 3,
maxPodsToEvictPerNode: &uint3,
applyFunc: func(pods []*v1.Pod) {
for _, pod := range pods {
pod.Status.Phase = v1.PodRunning
}
},
},
{
description: "pods pending with state=Running, 0 pod evictions",
args: RemovePodsHavingTooManyRestartsArgs{PodRestartThreshold: 1, States: []string{string(v1.PodRunning)}},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 0,
maxPodsToEvictPerNode: &uint3,
applyFunc: func(pods []*v1.Pod) {
for _, pod := range pods {
pod.Status.Phase = v1.PodPending
}
},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
pods := append(
initPods(node1),
test.BuildTestPod("CPU-consumer-1", 150, 100, node4.Name, nil),
test.BuildTestPod("CPU-consumer-2", 150, 100, node5.Name, nil),
)
if tc.applyFunc != nil {
tc.applyFunc(pods)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

View File

@@ -29,4 +29,5 @@ type RemovePodsHavingTooManyRestartsArgs struct {
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
PodRestartThreshold int32 `json:"podRestartThreshold"`
IncludingInitContainers bool `json:"includingInitContainers"`
States []string `json:"states"`
}

View File

@@ -16,8 +16,10 @@ package removepodshavingtoomanyrestarts
import (
"fmt"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
)
// ValidateRemovePodsHavingTooManyRestartsArgs validates RemovePodsHavingTooManyRestarts arguments
@@ -38,5 +40,17 @@ func ValidateRemovePodsHavingTooManyRestartsArgs(obj runtime.Object) error {
return fmt.Errorf("invalid PodsHavingTooManyRestarts threshold")
}
allowedStates := sets.New(
// Pod phases:
string(v1.PodRunning),
// Container state reasons:
"CrashLoopBackOff",
)
if !allowedStates.HasAll(args.States...) {
return fmt.Errorf("states must be one of %v", allowedStates.UnsortedList())
}
return nil
}

View File

@@ -0,0 +1,74 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package removepodshavingtoomanyrestarts
import (
"testing"
v1 "k8s.io/api/core/v1"
)
func TestValidateRemovePodsHavingTooManyRestartsArgs(t *testing.T) {
testCases := []struct {
description string
args *RemovePodsHavingTooManyRestartsArgs
expectError bool
}{
{
description: "valid arg, no errors",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 1,
States: []string{string(v1.PodRunning)},
},
expectError: false,
},
{
description: "invalid PodRestartThreshold arg, expects errors",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 0,
},
expectError: true,
},
{
description: "invalid States arg, expects errors",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 1,
States: []string{string(v1.PodFailed)},
},
expectError: true,
},
{
description: "allows CrashLoopBackOff state",
args: &RemovePodsHavingTooManyRestartsArgs{
PodRestartThreshold: 1,
States: []string{"CrashLoopBackOff"},
},
expectError: false,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
err := ValidateRemovePodsHavingTooManyRestartsArgs(tc.args)
hasError := err != nil
if tc.expectError != hasError {
t.Error("unexpected arg validation behavior")
}
})
}
}

View File

@@ -41,6 +41,11 @@ func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyInto(out *RemovePodsHavin
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.States != nil {
in, out := &in.States, &out.States
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}