1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

feat: support eviction of failed bare pods

This patch adds the policy(evictFailedBarePods) to allow the failed
pods without ownerReferences to be evicted. For backward compatibility,
disable the policy by default. Address #644.
This commit is contained in:
babygoat
2022-01-03 10:19:48 +08:00
parent 2b23694704
commit 1529180d70
27 changed files with 101 additions and 15 deletions

View File

@@ -132,6 +132,7 @@ The policy includes a common configuration that applies to all the strategies:
| `evictSystemCriticalPods` | `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
| `ignorePvcPods` | `false` | set whether PVC pods should be evicted or ignored |
| `maxNoOfPodsToEvictPerNode` | `nil` | maximum number of pods evicted from each node (summed through all strategies) |
| `evictFailedBarePods` | `false` | allow eviction of pods without owner references and in failed phase |
As part of the policy, the parameters associated with each strategy can be configured.
See each strategy for details on available parameters.
@@ -142,6 +143,7 @@ See each strategy for details on available parameters.
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
nodeSelector: prod=dev
evictFailedBarePods: false
evictLocalStoragePods: true
evictSystemCriticalPods: true
maxNoOfPodsToEvictPerNode: 40
@@ -740,8 +742,8 @@ Using Deployments instead of ReplicationControllers provides an automated rollou
When the descheduler decides to evict pods from a node, it employs the following general mechanism:
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set).
* Pods (static or mirrored pods or stand alone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
never evicted because these pods won't be recreated.
* Pods (static or mirrored pods or standalone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
never evicted because these pods won't be recreated. (Standalone pods in failed status phase can be evicted by setting `evictFailedBarePods: true`)
* Pods associated with DaemonSets are never evicted.
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set).
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).

View File

@@ -32,6 +32,9 @@ type DeschedulerPolicy struct {
// NodeSelector for a set of nodes to operate over
NodeSelector *string
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
EvictFailedBarePods *bool
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool

View File

@@ -32,6 +32,9 @@ type DeschedulerPolicy struct {
// NodeSelector for a set of nodes to operate over
NodeSelector *string `json:"nodeSelector,omitempty"`
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
EvictFailedBarePods *bool `json:"evictFailedBarePods,omitempty"`
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"`

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -133,6 +133,7 @@ func RegisterConversions(s *runtime.Scheme) error {
func autoConvert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *DeschedulerPolicy, out *api.DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*api.StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
@@ -161,6 +162,7 @@ func Convert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Descheduler
func autoConvert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.DeschedulerPolicy, out *DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -42,6 +42,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
*out = new(string)
**out = **in
}
if in.EvictFailedBarePods != nil {
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
*out = new(bool)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -42,6 +42,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
*out = new(string)
**out = **in
}
if in.EvictFailedBarePods != nil {
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
*out = new(bool)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -2,7 +2,7 @@
// +build !ignore_autogenerated
/*
Copyright 2021 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -103,6 +103,14 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods
}
evictBarePods := false
if deschedulerPolicy.EvictFailedBarePods != nil {
evictBarePods = *deschedulerPolicy.EvictFailedBarePods
if evictBarePods {
klog.V(1).InfoS("Warning: EvictFailedBarePods is set to True. This could cause eviction of pods without ownerReferences.")
}
}
evictSystemCriticalPods := false
if deschedulerPolicy.EvictSystemCriticalPods != nil {
evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods
@@ -140,6 +148,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
evictLocalStoragePods,
evictSystemCriticalPods,
ignorePvcPods,
evictBarePods,
)
for name, strategy := range deschedulerPolicy.Strategies {

View File

@@ -57,6 +57,7 @@ type PodEvictor struct {
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
ignorePvcPods bool
@@ -72,6 +73,7 @@ func NewPodEvictor(
evictLocalStoragePods bool,
evictSystemCriticalPods bool,
ignorePvcPods bool,
evictFailedBarePods bool,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
var namespacePodCount = make(namespacePodEvictCount)
@@ -91,6 +93,7 @@ func NewPodEvictor(
namespacePodCount: namespacePodCount,
evictLocalStoragePods: evictLocalStoragePods,
evictSystemCriticalPods: evictSystemCriticalPods,
evictFailedBarePods: evictFailedBarePods,
ignorePvcPods: ignorePvcPods,
}
}
@@ -228,6 +231,25 @@ func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
}
ev := &evictable{}
if pe.evictFailedBarePods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
ownerRefList := podutil.OwnerRef(pod)
// Enable evictFailedBarePods to evict bare pods in failed phase
if len(ownerRefList) == 0 && pod.Status.Phase != v1.PodFailed {
return fmt.Errorf("pod does not have any ownerRefs and is not in failed phase")
}
return nil
})
} else {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
ownerRefList := podutil.OwnerRef(pod)
// Moved from IsEvictable function for backward compatibility
if len(ownerRefList) == 0 {
return fmt.Errorf("pod does not have any ownerRefs")
}
return nil
})
}
if !pe.evictSystemCriticalPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
// Moved from IsEvictable function to allow for disabling
@@ -291,10 +313,6 @@ func (ev *evictable) IsEvictable(pod *v1.Pod) bool {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
}
if len(ownerRefList) == 0 {
checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs"))
}
if utils.IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}

View File

@@ -83,6 +83,7 @@ func TestIsEvictable(t *testing.T) {
pod *v1.Pod
nodes []*v1.Node
runBefore func(*v1.Pod, []*v1.Node)
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
priorityThreshold *int32
@@ -91,7 +92,27 @@ func TestIsEvictable(t *testing.T) {
}
testCases := []testCase{
{ // Normal pod eviction with normal ownerRefs
{ // Failed pod eviction with no ownerRefs.
pod: test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Status.Phase = v1.PodFailed
},
evictFailedBarePods: false,
result: false,
}, { // Normal pod eviction with no ownerRefs and evictFailedBarePods enabled
pod: test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
},
evictFailedBarePods: true,
result: false,
}, { // Failed pod eviction with no ownerRefs
pod: test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Status.Phase = v1.PodFailed
},
evictFailedBarePods: true,
result: true,
}, { // Normal pod eviction with normal ownerRefs
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
@@ -417,6 +438,7 @@ func TestIsEvictable(t *testing.T) {
podEvictor := &PodEvictor{
evictLocalStoragePods: test.evictLocalStoragePods,
evictSystemCriticalPods: test.evictSystemCriticalPods,
evictFailedBarePods: test.evictFailedBarePods,
nodes: nodes,
}

View File

@@ -300,6 +300,7 @@ func TestFindDuplicatePods(t *testing.T) {
false,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
@@ -725,6 +726,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
false,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -264,6 +264,7 @@ func TestRemoveFailedPods(t *testing.T) {
false,
false,
false,
false,
)
RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -225,6 +225,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
false,
false,
false,
false,
)
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -262,6 +262,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
tc.evictLocalStoragePods,
tc.evictSystemCriticalPods,
false,
false,
)
strategy := api.DeschedulerStrategy{

View File

@@ -466,6 +466,7 @@ func TestHighNodeUtilization(t *testing.T) {
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
@@ -669,6 +670,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
false,
false,
false,
false,
)
HighNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -723,6 +723,7 @@ func TestLowNodeUtilization(t *testing.T) {
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
@@ -1034,6 +1035,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
false,
false,
false,
false,
)
LowNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -212,6 +212,7 @@ func TestPodAntiAffinity(t *testing.T) {
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
Params: &api.StrategyParameters{

View File

@@ -301,6 +301,7 @@ func TestPodLifeTime(t *testing.T) {
false,
false,
tc.ignorePvcPods,
false,
)
PodLifeTime(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -237,6 +237,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
false,
false,
false,
false,
)
RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -905,6 +905,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
false,
false,
false,
false,
)
RemovePodsViolatingTopologySpreadConstraint(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
podsEvicted := podEvictor.TotalEvicted()

View File

@@ -150,6 +150,7 @@ func TestRemoveDuplicates(t *testing.T) {
true,
false,
false,
false,
)
t.Log("Running DeschedulerStrategy strategy")

View File

@@ -179,6 +179,7 @@ func runPodLifetimeStrategy(
false,
evictCritical,
false,
false,
),
getPodsAssignedToNode,
)
@@ -1316,5 +1317,6 @@ func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*
true,
false,
false,
false,
)
}

View File

@@ -140,6 +140,7 @@ func TestTooManyRestarts(t *testing.T) {
true,
false,
false,
false,
)
// Run RemovePodsHavingTooManyRestarts strategy
t.Log("Running RemovePodsHavingTooManyRestarts strategy")