1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

DefaultEvictor: introduce no-eviction policy

NoEvictionPolicy dictates whether a no-eviction policy is prefered or mandatory.
Needs to be used with caution as this will give users ability to protect their pods
from eviction. Which might work against enfored policies. E.g. plugins evicting pods
violating security policies.
This commit is contained in:
Jan Chaloupka
2025-07-23 10:27:05 +02:00
parent b84b2623b9
commit 7380aa6e0a
9 changed files with 219 additions and 6 deletions

View File

@@ -162,6 +162,7 @@ The Default Evictor Plugin is used by default for filtering pods before processi
| `minReplicas` |`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
| `minPodAge` |`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
| `ignorePodsWithoutPDB` |`bool`|`false`| set whether pods without PodDisruptionBudget should be evicted or ignored |
| `noEvictionPolicy` |`enum`|``| sets whether a `descheduler.alpha.kubernetes.io/prefer-no-eviction` pod annotation is considered preferred or mandatory. Accepted values: "", "Preferred", "Mandatory". Defaults to "Preferred". |
### Example policy
@@ -1013,12 +1014,16 @@ never evicted because these pods won't be recreated. (Standalone pods in failed
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).
* In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority,
best effort pods are evicted before burstable and guaranteed pods.
* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This
* All types of pods with the `descheduler.alpha.kubernetes.io/evict` annotation are eligible for eviction. This
annotation is used to override checks which prevent eviction and users can select which pod is evicted.
Users should know how and if the pod will be recreated.
The annotation only affects internal descheduler checks.
The anti-disruption protection provided by the [/eviction](https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/)
subresource is still respected.
* Pods with the `descheduler.alpha.kubernetes.io/prefer-no-eviction` annotation voice their preference not to be evicted.
Each plugin decides whether the annotation gets respected or not. When the `DefaultEvictor` plugin sets `noEvictionPolicy`
to `Mandatory` all such pods are excluded from eviction. Needs to be used with caution as some plugins may enfore
various policies that are expected to be always met.
* Pods with a non-nil DeletionTimestamp are not evicted by default.
Setting `--v=4` or greater on the Descheduler will log all reasons why any pod is not evictable.

View File

@@ -17,12 +17,17 @@ limitations under the License.
package utils
import (
corev1 "k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
)
const (
EvictionKind = "Eviction"
EvictionSubresource = "pods/eviction"
// A new experimental feature for soft no-eviction preference.
// Each plugin will decide whether the soft preference will be respected.
// If configured the soft preference turns into a mandatory no-eviction policy for the DefaultEvictor plugin.
SoftNoEvictionAnnotationKey = "descheduler.alpha.kubernetes.io/prefer-no-eviction"
)
// SupportEviction uses Discovery API to find out if the server support eviction subresource
@@ -56,3 +61,9 @@ func SupportEviction(client clientset.Interface) (string, error) {
}
return "", nil
}
// HaveEvictAnnotation checks if the pod have evict annotation
func HaveNoEvictionAnnotation(pod *corev1.Pod) bool {
_, found := pod.ObjectMeta.Annotations[SoftNoEvictionAnnotationKey]
return found
}

View File

@@ -25,6 +25,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/cache"
evictionutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
"sigs.k8s.io/descheduler/pkg/utils"
)
@@ -254,14 +255,32 @@ func SortPodsBasedOnPriorityLowToHigh(pods []*v1.Pod) {
return false
}
if (pods[j].Spec.Priority == nil && pods[i].Spec.Priority == nil) || (*pods[i].Spec.Priority == *pods[j].Spec.Priority) {
if IsBestEffortPod(pods[i]) {
iIsBestEffortPod := IsBestEffortPod(pods[i])
jIsBestEffortPod := IsBestEffortPod(pods[j])
iIsBurstablePod := IsBurstablePod(pods[i])
jIsBurstablePod := IsBurstablePod(pods[j])
iIsGuaranteedPod := IsGuaranteedPod(pods[i])
jIsGuaranteedPod := IsGuaranteedPod(pods[j])
if (iIsBestEffortPod && jIsBestEffortPod) || (iIsBurstablePod && jIsBurstablePod) || (iIsGuaranteedPod && jIsGuaranteedPod) {
iHasNoEvictonPolicy := evictionutils.HaveNoEvictionAnnotation(pods[i])
jHasNoEvictonPolicy := evictionutils.HaveNoEvictionAnnotation(pods[j])
if !iHasNoEvictonPolicy {
return true
}
if !jHasNoEvictonPolicy {
return false
}
return true
}
if IsBurstablePod(pods[i]) && IsGuaranteedPod(pods[j]) {
if iIsBestEffortPod {
return true
}
if iIsBurstablePod && jIsGuaranteedPod {
return true
}
return false
}
return *pods[i].Spec.Priority < *pods[j].Spec.Priority
})
}

View File

@@ -157,8 +157,66 @@ func TestSortPodsBasedOnPriorityLowToHigh(t *testing.T) {
p6 := test.BuildTestPod("p6", 400, 100, n1.Name, test.MakeGuaranteedPod)
p6.Spec.Priority = nil
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
expectedPodList := []*v1.Pod{p5, p6, p1, p2, p3, p4}
p7 := test.BuildTestPod("p7", 400, 0, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, lowPriority)
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/prefer-no-eviction": "",
}
})
// BestEffort
p8 := test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, highPriority)
test.MakeBestEffortPod(pod)
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/prefer-no-eviction": "",
}
})
// Burstable
p9 := test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, highPriority)
test.MakeBurstablePod(pod)
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/prefer-no-eviction": "",
}
})
// Guaranteed
p10 := test.BuildTestPod("p10", 400, 100, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, highPriority)
test.MakeGuaranteedPod(pod)
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/prefer-no-eviction": "",
}
})
// Burstable
p11 := test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) {
test.MakeBurstablePod(pod)
})
// Burstable
p12 := test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) {
test.MakeBurstablePod(pod)
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/prefer-no-eviction": "",
}
})
podList := []*v1.Pod{p1, p8, p9, p10, p2, p3, p4, p5, p6, p7, p11, p12}
// p5: no priority, best effort
// p11: no priority, burstable
// p6: no priority, guaranteed
// p1: low priority
// p7: low priority, prefer-no-eviction
// p2: high priority, best effort
// p8: high priority, best effort, prefer-no-eviction
// p3: high priority, burstable
// p9: high priority, burstable, prefer-no-eviction
// p4: high priority, guaranteed
// p10: high priority, guaranteed, prefer-no-eviction
expectedPodList := []*v1.Pod{p5, p11, p12, p6, p1, p7, p2, p8, p3, p9, p4, p10}
SortPodsBasedOnPriorityLowToHigh(podList)
if !reflect.DeepEqual(getPodListNames(podList), getPodListNames(expectedPodList)) {

View File

@@ -24,6 +24,7 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
evictionutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
@@ -140,6 +141,10 @@ func (d *DefaultEvictor) Filter(pod *v1.Pod) bool {
return true
}
if d.args.NoEvictionPolicy == MandatoryNoEvictionPolicy && evictionutils.HaveNoEvictionAnnotation(pod) {
return false
}
if utils.IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}

View File

@@ -29,6 +29,7 @@ import (
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
"sigs.k8s.io/descheduler/pkg/api"
evictionutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
frameworkfake "sigs.k8s.io/descheduler/pkg/framework/fake"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
@@ -51,6 +52,7 @@ type testCase struct {
minPodAge *metav1.Duration
result bool
ignorePodsWithoutPDB bool
noEvictionPolicy NoEvictionPolicy
}
func TestDefaultEvictorPreEvictionFilter(t *testing.T) {
@@ -358,6 +360,29 @@ func TestDefaultEvictorFilter(t *testing.T) {
}),
},
result: true,
}, {
description: "Normal pod eviction with normal ownerRefs and " + evictionutils.SoftNoEvictionAnnotationKey + " annotation (preference)",
pods: []*v1.Pod{
test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{evictionutils.SoftNoEvictionAnnotationKey: ""}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, {
description: "Normal pod eviction with normal ownerRefs and " + evictionutils.SoftNoEvictionAnnotationKey + " annotation (mandatory)",
pods: []*v1.Pod{
test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{evictionutils.SoftNoEvictionAnnotationKey: ""}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
noEvictionPolicy: MandatoryNoEvictionPolicy,
result: false,
}, {
description: "Normal pod eviction with replicaSet ownerRefs",
pods: []*v1.Pod{
@@ -831,6 +856,7 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
MinReplicas: test.minReplicas,
MinPodAge: test.minPodAge,
IgnorePodsWithoutPDB: test.ignorePodsWithoutPDB,
NoEvictionPolicy: test.noEvictionPolicy,
}
evictorPlugin, err := New(

View File

@@ -37,4 +37,23 @@ type DefaultEvictorArgs struct {
MinReplicas uint `json:"minReplicas,omitempty"`
MinPodAge *metav1.Duration `json:"minPodAge,omitempty"`
IgnorePodsWithoutPDB bool `json:"ignorePodsWithoutPDB,omitempty"`
NoEvictionPolicy NoEvictionPolicy `json:"noEvictionPolicy,omitempty"`
}
// NoEvictionPolicy dictates whether a no-eviction policy is preferred or mandatory.
// Needs to be used with caution as this will give users ability to protect their pods
// from eviction. Which might work against enfored policies. E.g. plugins evicting pods
// violating security policies.
type NoEvictionPolicy string
const (
// PreferredNoEvictionPolicy interprets the no-eviction policy as a preference.
// Meaning the annotation will get ignored by the DefaultEvictor plugin.
// Yet, plugins may optionally sort their pods based on the annotation
// and focus on evicting pods that do not set the annotation.
PreferredNoEvictionPolicy NoEvictionPolicy = "Preferred"
// MandatoryNoEvictionPolicy interprets the no-eviction policy as mandatory.
// Every pod carying the annotation will get excluded from eviction.
MandatoryNoEvictionPolicy NoEvictionPolicy = "Mandatory"
)

View File

@@ -25,12 +25,18 @@ func ValidateDefaultEvictorArgs(obj runtime.Object) error {
args := obj.(*DefaultEvictorArgs)
if args.PriorityThreshold != nil && args.PriorityThreshold.Value != nil && len(args.PriorityThreshold.Name) > 0 {
return fmt.Errorf("priority threshold misconfigured, only one of priorityThreshold fields can be set, got %v", args)
return fmt.Errorf("priority threshold misconfigured, only one of priorityThreshold fields can be set")
}
if args.MinReplicas == 1 {
klog.V(4).Info("DefaultEvictor minReplicas must be greater than 1 to check for min pods during eviction. This check will be ignored during eviction.")
}
if args.NoEvictionPolicy != "" {
if args.NoEvictionPolicy != PreferredNoEvictionPolicy && args.NoEvictionPolicy != MandatoryNoEvictionPolicy {
return fmt.Errorf("noEvictionPolicy accepts only %q values", []NoEvictionPolicy{PreferredNoEvictionPolicy, MandatoryNoEvictionPolicy})
}
}
return nil
}

View File

@@ -0,0 +1,64 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package defaultevictor
import (
"fmt"
"testing"
"k8s.io/apimachinery/pkg/runtime"
utilptr "k8s.io/utils/ptr"
"sigs.k8s.io/descheduler/pkg/api"
)
func TestValidateDefaultEvictorArgs(t *testing.T) {
tests := []struct {
name string
args *DefaultEvictorArgs
errInfo error
}{
{
name: "passing invalid priority",
args: &DefaultEvictorArgs{
PriorityThreshold: &api.PriorityThreshold{
Value: utilptr.To[int32](1),
Name: "priority-name",
},
},
errInfo: fmt.Errorf("priority threshold misconfigured, only one of priorityThreshold fields can be set"),
}, {
name: "passing invalid no eviction policy",
args: &DefaultEvictorArgs{
NoEvictionPolicy: "invalid-no-eviction-policy",
},
errInfo: fmt.Errorf("noEvictionPolicy accepts only %q values", []NoEvictionPolicy{PreferredNoEvictionPolicy, MandatoryNoEvictionPolicy}),
},
}
for _, testCase := range tests {
t.Run(testCase.name, func(t *testing.T) {
validateErr := ValidateDefaultEvictorArgs(runtime.Object(testCase.args))
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
t.Errorf("expected validity of plugin config: %q but got %q instead", testCase.errInfo, validateErr)
}
} else if validateErr.Error() != testCase.errInfo.Error() {
t.Errorf("expected validity of plugin config: %q but got %q instead", testCase.errInfo, validateErr)
}
})
}
}