1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #1381 from yuanchen8911/exitcode

Add an extra parameter ExitCode to RemoveFailedPods
This commit is contained in:
Kubernetes Prow Robot
2024-05-02 20:27:52 -07:00
committed by GitHub
5 changed files with 139 additions and 22 deletions

View File

@@ -207,7 +207,7 @@ Balance Plugins: These plugins process all pods, or groups of pods, and determin
| [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint) |Balance|Evicts pods violating TopologySpreadConstraints|
| [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts) |Deschedule|Evicts pods having too many restarts|
| [PodLifeTime](#podlifetime) |Deschedule|Evicts pods that have exceeded a specified age limit|
| [RemoveFailedPods](#removefailedpods) |Deschedule|Evicts pods with certain failed reasons|
| [RemoveFailedPods](#removefailedpods) |Deschedule|Evicts pods with certain failed reasons and exit codes|
### RemoveDuplicates
@@ -698,10 +698,8 @@ profiles:
```
### RemoveFailedPods
This strategy evicts pods that are in failed status phase.
You can provide an optional parameter to filter by failed `reasons`.
`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can provide optional parameters to filter by failed pods' and containters' `reasons`. and `exitCodes`. `exitCodes` apply to failed pods' containers with `terminated` state only. `reasons` and `exitCodes` can be expanded to include those of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds.
Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
@@ -713,6 +711,7 @@ has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considere
|`minPodLifetimeSeconds`|uint|
|`excludeOwnerKinds`|list(string)|
|`reasons`|list(string)|
|`exitCodes`|list(int32)|
|`includingInitContainers`|bool|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
@@ -729,6 +728,8 @@ profiles:
args:
reasons:
- "NodeAffinity"
exitCodes:
- 1
includingInitContainers: true
excludeOwnerKinds:
- "Job"

View File

@@ -148,6 +148,17 @@ func validateCanEvict(pod *v1.Pod, failedPodArgs *RemoveFailedPodsArgs) error {
}
}
if len(failedPodArgs.ExitCodes) > 0 {
exitCodes := getFailedContainerStatusExitCodes(pod.Status.ContainerStatuses)
if failedPodArgs.IncludingInitContainers {
exitCodes = append(exitCodes, getFailedContainerStatusExitCodes(pod.Status.InitContainerStatuses)...)
}
if !sets.New(failedPodArgs.ExitCodes...).HasAny(exitCodes...) {
errs = append(errs, fmt.Errorf("pod does not match any of the exitCodes"))
}
}
return utilerrors.NewAggregate(errs)
}
@@ -165,3 +176,15 @@ func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []s
return reasons
}
func getFailedContainerStatusExitCodes(containerStatuses []v1.ContainerStatus) []int32 {
exitCodes := make([]int32, 0)
for _, containerStatus := range containerStatuses {
if containerStatus.State.Terminated != nil {
exitCodes = append(exitCodes, containerStatus.State.Terminated.ExitCode)
}
}
return exitCodes
}

View File

@@ -41,12 +41,15 @@ var OneHourInSeconds uint = 3600
func TestRemoveFailedPods(t *testing.T) {
createRemoveFailedPodsArgs := func(
includingInitContainers bool,
reasons, excludeKinds []string,
reasons []string,
exitCodes []int32,
excludeKinds []string,
minAgeSeconds *uint,
) RemoveFailedPodsArgs {
return RemoveFailedPodsArgs{
IncludingInitContainers: includingInitContainers,
Reasons: reasons,
ExitCodes: exitCodes,
MinPodLifetimeSeconds: minAgeSeconds,
ExcludeOwnerKinds: excludeKinds,
}
@@ -69,14 +72,14 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "0 failures, 0 evictions",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed
},
{
description: "1 container terminated with reason NodeAffinity, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
@@ -85,9 +88,70 @@ func TestRemoveFailedPods(t *testing.T) {
}), nil),
},
},
{
description: "1 container terminated with exitCode 1, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
},
},
{
description: "1 container terminated with exitCode 1, reason NodeAffinity, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity", ExitCode: 1},
}), nil),
},
},
{
description: "1 container terminated with exitCode 1, expected exitCode 2, 0 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{2}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
},
},
{
description: "2 containers terminated with exitCode 1 and 2 respectively, 2 evictions",
args: createRemoveFailedPodsArgs(false, nil, []int32{1, 2, 3}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 2,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
buildTestPod("p2", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 2},
}), nil),
},
},
{
description: "2 containers terminated with exitCode 1 and 2 respectively, expected exitCode 1, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
buildTestPod("p2", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 2},
}), nil),
},
},
{
description: "1 init container terminated with reason NodeAffinity, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, nil, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
@@ -96,9 +160,31 @@ func TestRemoveFailedPods(t *testing.T) {
}, nil), nil),
},
},
{
description: "1 init container terminated with exitCode 1, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
},
},
{
description: "1 init container terminated with exitCode 2, expected 1, 0 eviction",
args: createRemoveFailedPodsArgs(true, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 2},
}), nil),
},
},
{
description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, nil, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
@@ -109,7 +195,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions",
args: createRemoveFailedPodsArgs(true, nil, nil, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, nil, nil),
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 3000, 10, nil),
@@ -126,7 +212,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
@@ -137,7 +223,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
@@ -148,7 +234,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -159,7 +245,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -170,7 +256,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction",
args: createRemoveFailedPodsArgs(false, nil, nil, &OneHourInSeconds),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, &OneHourInSeconds),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -181,7 +267,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) {
@@ -198,7 +284,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -210,7 +296,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
args: createRemoveFailedPodsArgs(true, nil, []string{"ReplicaSet"}, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, []string{"ReplicaSet"}, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -221,7 +307,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, []string{"DaemonSet"}, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, []string{"DaemonSet"}, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
@@ -232,7 +318,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 pod in termination; nothing should be moved",
args: createRemoveFailedPodsArgs(true, nil, []string{"DaemonSet"}, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, []string{"DaemonSet"}, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -243,7 +329,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "1 container terminated with reason ShutDown, 0 evictions",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
@@ -253,7 +339,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=Shutdown, 2 containers terminated with reason ShutDown, 2 evictions",
args: createRemoveFailedPodsArgs(false, []string{"Shutdown"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"Shutdown"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 2,
pods: []*v1.Pod{
@@ -322,6 +408,7 @@ func TestRemoveFailedPods(t *testing.T) {
plugin, err := New(&RemoveFailedPodsArgs{
Reasons: tc.args.Reasons,
ExitCodes: tc.args.ExitCodes,
MinPodLifetimeSeconds: tc.args.MinPodLifetimeSeconds,
IncludingInitContainers: tc.args.IncludingInitContainers,
ExcludeOwnerKinds: tc.args.ExcludeOwnerKinds,

View File

@@ -30,5 +30,6 @@ type RemoveFailedPodsArgs struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds"`
Reasons []string `json:"reasons"`
ExitCodes []int32 `json:"exitCodes"`
IncludingInitContainers bool `json:"includingInitContainers"`
}

View File

@@ -56,6 +56,11 @@ func (in *RemoveFailedPodsArgs) DeepCopyInto(out *RemoveFailedPodsArgs) {
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.ExitCodes != nil {
in, out := &in.ExitCodes, &out.ExitCodes
*out = make([]int32, len(*in))
copy(*out, *in)
}
return
}