mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
add ignorePodsWithoutPDB option (#1529)
* add ignoreNonPDBPods option * take2 * add test * poddisruptionbudgets are now used by defaultevictor plugin * add poddisruptionbudgets to rbac * review comments * don't use GetPodPodDisruptionBudgets * review comment, don't hide error
This commit is contained in:
23
README.md
23
README.md
@@ -129,18 +129,19 @@ These are top level keys in the Descheduler Policy that you can use to configure
|
||||
|
||||
The Default Evictor Plugin is used by default for filtering pods before processing them in an strategy plugin, or for applying a PreEvictionFilter of pods before eviction. You can also create your own Evictor Plugin or use the Default one provided by Descheduler. Other uses for the Evictor plugin can be to sort, filter, validate or group pods by different criteria, and that's why this is handled by a plugin and not configured in the top level config.
|
||||
|
||||
| Name |type| Default Value | Description |
|
||||
|------|----|---------------|-------------|
|
||||
| `nodeSelector` |`string`| `nil` | limiting the nodes which are processed |
|
||||
| `evictLocalStoragePods` |`bool`| `false` | allows eviction of pods with local storage |
|
||||
| Name |type| Default Value | Description |
|
||||
|---------------------------|----|---------------|-----------------------------------------------------------------------------------------------------------------------------|
|
||||
| `nodeSelector` |`string`| `nil` | limiting the nodes which are processed |
|
||||
| `evictLocalStoragePods` |`bool`| `false` | allows eviction of pods with local storage |
|
||||
| `evictSystemCriticalPods` |`bool`| `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
|
||||
| `ignorePvcPods` |`bool`| `false` | set whether PVC pods should be evicted or ignored |
|
||||
| `evictFailedBarePods` |`bool`| `false` | allow eviction of pods without owner references and in failed phase |
|
||||
|`labelSelector`|`metav1.LabelSelector`||(see [label filtering](#label-filtering))|
|
||||
|`priorityThreshold`|`priorityThreshold`||(see [priority filtering](#priority-filtering))|
|
||||
|`nodeFit`|`bool`|`false`|(see [node fit filtering](#node-fit-filtering))|
|
||||
|`minReplicas`|`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
|
||||
|`minPodAge`|`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
|
||||
| `ignorePvcPods` |`bool`| `false` | set whether PVC pods should be evicted or ignored |
|
||||
| `evictFailedBarePods` |`bool`| `false` | allow eviction of pods without owner references and in failed phase |
|
||||
| `labelSelector` |`metav1.LabelSelector`|| (see [label filtering](#label-filtering)) |
|
||||
| `priorityThreshold` |`priorityThreshold`|| (see [priority filtering](#priority-filtering)) |
|
||||
| `nodeFit` |`bool`|`false`| (see [node fit filtering](#node-fit-filtering)) |
|
||||
| `minReplicas` |`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
|
||||
| `minPodAge` |`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
|
||||
| `ignorePodsWithoutPDB` |`bool`|`false`| set whether pods without PodDisruptionBudget should be evicted or ignored |
|
||||
|
||||
### Example policy
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@ rules:
|
||||
- apiGroups: ["scheduling.k8s.io"]
|
||||
resources: ["priorityclasses"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: ["policy"]
|
||||
resources: ["poddisruptionbudgets"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
verbs: ["create", "update"]
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
schedulingv1 "k8s.io/api/scheduling/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
|
||||
@@ -132,8 +133,11 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
|
||||
v1.SchemeGroupVersion.WithResource("nodes"),
|
||||
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
|
||||
// consistent with the real runs without having to keep the list here in sync.
|
||||
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
|
||||
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses")) // Used by the defaultevictor plugin
|
||||
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
|
||||
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), // Used by the defaultevictor plugin
|
||||
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin
|
||||
|
||||
) // Used by the defaultevictor plugin
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
|
||||
@@ -105,6 +105,7 @@ func setDefaultEvictor(profile api.DeschedulerProfile, client clientset.Interfac
|
||||
EvictSystemCriticalPods: false,
|
||||
IgnorePvcPods: false,
|
||||
EvictFailedBarePods: false,
|
||||
IgnorePodsWithoutPDB: false,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -195,6 +195,20 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
if defaultEvictorArgs.IgnorePodsWithoutPDB {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
hasPdb, err := utils.IsPodCoveredByPDB(pod, handle.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister())
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to check if pod is covered by PodDisruptionBudget: %w", err)
|
||||
}
|
||||
if !hasPdb {
|
||||
return fmt.Errorf("no PodDisruptionBudget found for pod")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
return ev, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
@@ -39,6 +40,7 @@ type testCase struct {
|
||||
description string
|
||||
pods []*v1.Pod
|
||||
nodes []*v1.Node
|
||||
pdbs []*policyv1.PodDisruptionBudget
|
||||
evictFailedBarePods bool
|
||||
evictLocalStoragePods bool
|
||||
evictSystemCriticalPods bool
|
||||
@@ -47,6 +49,7 @@ type testCase struct {
|
||||
minReplicas uint
|
||||
minPodAge *metav1.Duration
|
||||
result bool
|
||||
ignorePodsWithoutPDB bool
|
||||
}
|
||||
|
||||
func TestDefaultEvictorPreEvictionFilter(t *testing.T) {
|
||||
@@ -739,6 +742,33 @@ func TestDefaultEvictorFilter(t *testing.T) {
|
||||
}),
|
||||
},
|
||||
result: true,
|
||||
}, {
|
||||
description: "ignorePodsWithoutPDB, pod with no PDBs, no eviction",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Labels = map[string]string{
|
||||
"app": "foo",
|
||||
}
|
||||
}),
|
||||
},
|
||||
ignorePodsWithoutPDB: true,
|
||||
result: false,
|
||||
}, {
|
||||
description: "ignorePodsWithoutPDB, pod with PDBs, evicts",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Labels = map[string]string{
|
||||
"app": "foo",
|
||||
}
|
||||
}),
|
||||
},
|
||||
pdbs: []*policyv1.PodDisruptionBudget{
|
||||
test.BuildTestPDB("pdb1", "foo"),
|
||||
},
|
||||
ignorePodsWithoutPDB: true,
|
||||
result: true,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -811,11 +841,15 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
|
||||
for _, pod := range test.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
for _, pdb := range test.pdbs {
|
||||
objs = append(objs, pdb)
|
||||
}
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
|
||||
_ = sharedInformerFactory.Policy().V1().PodDisruptionBudgets().Lister()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
@@ -833,9 +867,10 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
|
||||
PriorityThreshold: &api.PriorityThreshold{
|
||||
Value: test.priorityThreshold,
|
||||
},
|
||||
NodeFit: test.nodeFit,
|
||||
MinReplicas: test.minReplicas,
|
||||
MinPodAge: test.minPodAge,
|
||||
NodeFit: test.nodeFit,
|
||||
MinReplicas: test.minReplicas,
|
||||
MinPodAge: test.minPodAge,
|
||||
IgnorePodsWithoutPDB: test.ignorePodsWithoutPDB,
|
||||
}
|
||||
|
||||
evictorPlugin, err := New(
|
||||
|
||||
@@ -42,6 +42,7 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
|
||||
LabelSelector: nil,
|
||||
PriorityThreshold: nil,
|
||||
NodeFit: false,
|
||||
IgnorePodsWithoutPDB: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -57,7 +58,8 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
|
||||
PriorityThreshold: &api.PriorityThreshold{
|
||||
Value: utilptr.To[int32](800),
|
||||
},
|
||||
NodeFit: true,
|
||||
NodeFit: true,
|
||||
IgnorePodsWithoutPDB: true,
|
||||
},
|
||||
want: &DefaultEvictorArgs{
|
||||
NodeSelector: "NodeSelector",
|
||||
@@ -70,7 +72,8 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
|
||||
PriorityThreshold: &api.PriorityThreshold{
|
||||
Value: utilptr.To[int32](800),
|
||||
},
|
||||
NodeFit: true,
|
||||
NodeFit: true,
|
||||
IgnorePodsWithoutPDB: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,4 +36,5 @@ type DefaultEvictorArgs struct {
|
||||
NodeFit bool `json:"nodeFit,omitempty"`
|
||||
MinReplicas uint `json:"minReplicas,omitempty"`
|
||||
MinPodAge *metav1.Duration `json:"minPodAge,omitempty"`
|
||||
IgnorePodsWithoutPDB bool `json:"ignorePodsWithoutPDB,omitempty"`
|
||||
}
|
||||
|
||||
@@ -3,6 +3,11 @@ package utils
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
policy "k8s.io/api/policy/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
|
||||
policyv1 "k8s.io/client-go/listers/policy/v1"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@@ -115,6 +120,38 @@ func IsPodWithPVC(pod *v1.Pod) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsPodCoveredByPDB returns true if the pod is covered by at least one PodDisruptionBudget.
|
||||
func IsPodCoveredByPDB(pod *v1.Pod, lister policyv1.PodDisruptionBudgetLister) (bool, error) {
|
||||
// We can't use the GetPodPodDisruptionBudgets expansion method here because it treats no pdb as an error,
|
||||
// but we want to return false.
|
||||
|
||||
list, err := lister.PodDisruptionBudgets(pod.Namespace).List(labels.Everything())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
podLabels := labels.Set(pod.Labels)
|
||||
var pdbList []*policy.PodDisruptionBudget
|
||||
for _, pdb := range list {
|
||||
selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector)
|
||||
if err != nil {
|
||||
// This object has an invalid selector, it will never match the pod
|
||||
continue
|
||||
}
|
||||
|
||||
if !selector.Matches(podLabels) {
|
||||
continue
|
||||
}
|
||||
pdbList = append(pdbList, pdb)
|
||||
}
|
||||
|
||||
return len(pdbList) > 0, nil
|
||||
}
|
||||
|
||||
// GetPodSource returns the source of the pod based on the annotation.
|
||||
func GetPodSource(pod *v1.Pod) (string, error) {
|
||||
if pod.Annotations != nil {
|
||||
|
||||
@@ -23,6 +23,9 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/apimachinery/pkg/util/intstr"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
@@ -67,6 +70,25 @@ func BuildTestPod(name string, cpu, memory int64, nodeName string, apply func(*v
|
||||
return pod
|
||||
}
|
||||
|
||||
func BuildTestPDB(name, appLabel string) *policyv1.PodDisruptionBudget {
|
||||
maxUnavailable := intstr.FromInt32(1)
|
||||
pdb := &policyv1.PodDisruptionBudget{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: "default",
|
||||
Name: name,
|
||||
},
|
||||
Spec: policyv1.PodDisruptionBudgetSpec{
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"app": appLabel,
|
||||
},
|
||||
},
|
||||
MaxUnavailable: &maxUnavailable,
|
||||
},
|
||||
}
|
||||
return pdb
|
||||
}
|
||||
|
||||
// GetMirrorPodAnnotation returns the annotation needed for mirror pod.
|
||||
func GetMirrorPodAnnotation() map[string]string {
|
||||
return map[string]string{
|
||||
|
||||
Reference in New Issue
Block a user