1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-25 20:59:28 +01:00

add ignorePodsWithoutPDB option (#1529)

* add ignoreNonPDBPods option

* take2

* add test

* poddisruptionbudgets are now used by defaultevictor plugin

* add poddisruptionbudgets to rbac

* review comments

* don't use GetPodPodDisruptionBudgets

* review comment, don't hide error
This commit is contained in:
Simon Scharf
2024-10-15 22:21:04 +02:00
committed by GitHub
parent 7696f00518
commit ef0c2c1c47
10 changed files with 139 additions and 18 deletions

View File

@@ -129,18 +129,19 @@ These are top level keys in the Descheduler Policy that you can use to configure
The Default Evictor Plugin is used by default for filtering pods before processing them in an strategy plugin, or for applying a PreEvictionFilter of pods before eviction. You can also create your own Evictor Plugin or use the Default one provided by Descheduler. Other uses for the Evictor plugin can be to sort, filter, validate or group pods by different criteria, and that's why this is handled by a plugin and not configured in the top level config.
| Name |type| Default Value | Description |
|------|----|---------------|-------------|
| `nodeSelector` |`string`| `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` |`bool`| `false` | allows eviction of pods with local storage |
| Name |type| Default Value | Description |
|---------------------------|----|---------------|-----------------------------------------------------------------------------------------------------------------------------|
| `nodeSelector` |`string`| `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` |`bool`| `false` | allows eviction of pods with local storage |
| `evictSystemCriticalPods` |`bool`| `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
| `ignorePvcPods` |`bool`| `false` | set whether PVC pods should be evicted or ignored |
| `evictFailedBarePods` |`bool`| `false` | allow eviction of pods without owner references and in failed phase |
|`labelSelector`|`metav1.LabelSelector`||(see [label filtering](#label-filtering))|
|`priorityThreshold`|`priorityThreshold`||(see [priority filtering](#priority-filtering))|
|`nodeFit`|`bool`|`false`|(see [node fit filtering](#node-fit-filtering))|
|`minReplicas`|`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
|`minPodAge`|`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
| `ignorePvcPods` |`bool`| `false` | set whether PVC pods should be evicted or ignored |
| `evictFailedBarePods` |`bool`| `false` | allow eviction of pods without owner references and in failed phase |
| `labelSelector` |`metav1.LabelSelector`|| (see [label filtering](#label-filtering)) |
| `priorityThreshold` |`priorityThreshold`|| (see [priority filtering](#priority-filtering)) |
| `nodeFit` |`bool`|`false`| (see [node fit filtering](#node-fit-filtering)) |
| `minReplicas` |`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
| `minPodAge` |`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
| `ignorePodsWithoutPDB` |`bool`|`false`| set whether pods without PodDisruptionBudget should be evicted or ignored |
### Example policy

View File

@@ -22,6 +22,9 @@ rules:
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["get", "watch", "list"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create", "update"]

View File

@@ -23,6 +23,7 @@ import (
"strconv"
"time"
policyv1 "k8s.io/api/policy/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -132,8 +133,11 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
v1.SchemeGroupVersion.WithResource("nodes"),
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
// consistent with the real runs without having to keep the list here in sync.
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses")) // Used by the defaultevictor plugin
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), // Used by the defaultevictor plugin
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin
) // Used by the defaultevictor plugin
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {

View File

@@ -105,6 +105,7 @@ func setDefaultEvictor(profile api.DeschedulerProfile, client clientset.Interfac
EvictSystemCriticalPods: false,
IgnorePvcPods: false,
EvictFailedBarePods: false,
IgnorePodsWithoutPDB: false,
},
}

View File

@@ -195,6 +195,20 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug
return nil
})
}
if defaultEvictorArgs.IgnorePodsWithoutPDB {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
hasPdb, err := utils.IsPodCoveredByPDB(pod, handle.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister())
if err != nil {
return fmt.Errorf("unable to check if pod is covered by PodDisruptionBudget: %w", err)
}
if !hasPdb {
return fmt.Errorf("no PodDisruptionBudget found for pod")
}
return nil
})
}
return ev, nil
}

View File

@@ -22,6 +22,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
@@ -39,6 +40,7 @@ type testCase struct {
description string
pods []*v1.Pod
nodes []*v1.Node
pdbs []*policyv1.PodDisruptionBudget
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
@@ -47,6 +49,7 @@ type testCase struct {
minReplicas uint
minPodAge *metav1.Duration
result bool
ignorePodsWithoutPDB bool
}
func TestDefaultEvictorPreEvictionFilter(t *testing.T) {
@@ -739,6 +742,33 @@ func TestDefaultEvictorFilter(t *testing.T) {
}),
},
result: true,
}, {
description: "ignorePodsWithoutPDB, pod with no PDBs, no eviction",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Labels = map[string]string{
"app": "foo",
}
}),
},
ignorePodsWithoutPDB: true,
result: false,
}, {
description: "ignorePodsWithoutPDB, pod with PDBs, evicts",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Labels = map[string]string{
"app": "foo",
}
}),
},
pdbs: []*policyv1.PodDisruptionBudget{
test.BuildTestPDB("pdb1", "foo"),
},
ignorePodsWithoutPDB: true,
result: true,
},
}
@@ -811,11 +841,15 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
for _, pod := range test.pods {
objs = append(objs, pod)
}
for _, pdb := range test.pdbs {
objs = append(objs, pdb)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
_ = sharedInformerFactory.Policy().V1().PodDisruptionBudgets().Lister()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
@@ -833,9 +867,10 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
PriorityThreshold: &api.PriorityThreshold{
Value: test.priorityThreshold,
},
NodeFit: test.nodeFit,
MinReplicas: test.minReplicas,
MinPodAge: test.minPodAge,
NodeFit: test.nodeFit,
MinReplicas: test.minReplicas,
MinPodAge: test.minPodAge,
IgnorePodsWithoutPDB: test.ignorePodsWithoutPDB,
}
evictorPlugin, err := New(

View File

@@ -42,6 +42,7 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
LabelSelector: nil,
PriorityThreshold: nil,
NodeFit: false,
IgnorePodsWithoutPDB: false,
},
},
{
@@ -57,7 +58,8 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
PriorityThreshold: &api.PriorityThreshold{
Value: utilptr.To[int32](800),
},
NodeFit: true,
NodeFit: true,
IgnorePodsWithoutPDB: true,
},
want: &DefaultEvictorArgs{
NodeSelector: "NodeSelector",
@@ -70,7 +72,8 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
PriorityThreshold: &api.PriorityThreshold{
Value: utilptr.To[int32](800),
},
NodeFit: true,
NodeFit: true,
IgnorePodsWithoutPDB: true,
},
},
}

View File

@@ -36,4 +36,5 @@ type DefaultEvictorArgs struct {
NodeFit bool `json:"nodeFit,omitempty"`
MinReplicas uint `json:"minReplicas,omitempty"`
MinPodAge *metav1.Duration `json:"minPodAge,omitempty"`
IgnorePodsWithoutPDB bool `json:"ignorePodsWithoutPDB,omitempty"`
}

View File

@@ -3,6 +3,11 @@ package utils
import (
"fmt"
policy "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/labels"
policyv1 "k8s.io/client-go/listers/policy/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -115,6 +120,38 @@ func IsPodWithPVC(pod *v1.Pod) bool {
return false
}
// IsPodCoveredByPDB returns true if the pod is covered by at least one PodDisruptionBudget.
func IsPodCoveredByPDB(pod *v1.Pod, lister policyv1.PodDisruptionBudgetLister) (bool, error) {
// We can't use the GetPodPodDisruptionBudgets expansion method here because it treats no pdb as an error,
// but we want to return false.
list, err := lister.PodDisruptionBudgets(pod.Namespace).List(labels.Everything())
if err != nil {
return false, err
}
if len(list) == 0 {
return false, nil
}
podLabels := labels.Set(pod.Labels)
var pdbList []*policy.PodDisruptionBudget
for _, pdb := range list {
selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector)
if err != nil {
// This object has an invalid selector, it will never match the pod
continue
}
if !selector.Matches(podLabels) {
continue
}
pdbList = append(pdbList, pdb)
}
return len(pdbList) > 0, nil
}
// GetPodSource returns the source of the pod based on the annotation.
func GetPodSource(pod *v1.Pod) (string, error) {
if pod.Annotations != nil {

View File

@@ -23,6 +23,9 @@ import (
"testing"
"time"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/util/intstr"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
@@ -67,6 +70,25 @@ func BuildTestPod(name string, cpu, memory int64, nodeName string, apply func(*v
return pod
}
func BuildTestPDB(name, appLabel string) *policyv1.PodDisruptionBudget {
maxUnavailable := intstr.FromInt32(1)
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
Name: name,
},
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": appLabel,
},
},
MaxUnavailable: &maxUnavailable,
},
}
return pdb
}
// GetMirrorPodAnnotation returns the annotation needed for mirror pod.
func GetMirrorPodAnnotation() map[string]string {
return map[string]string{