From d9d6ca64e9dc9ab0d95dc1f22b0e6dee1aab3ada Mon Sep 17 00:00:00 2001 From: Ricardo Maraschini Date: Wed, 8 Oct 2025 09:43:05 +0200 Subject: [PATCH] feat: enable pod protection based on storage classes this commit introduces a new customization on the existing PodsWithPVC protection. this new customization allow users to make pods that refer to a given storage class unevictable. for example, to protect pods referring to `storage-class-0` and `storage-class-1` this configuration can be used: ```yaml apiVersion: "descheduler/v1alpha2" kind: "DeschedulerPolicy" profiles: - name: ProfileName pluginConfig: - name: "DefaultEvictor" args: podProtections: extraEnabled: - PodsWithPVC config: PodsWithPVC: protectedStorageClasses: - name: storage-class-0 - name: storage-class-1 ``` changes introduced by this pr: 1. the descheduler starts to observe persistent volume claims. 1. a new api field was introduced to allow per pod protection config. 1. rbac had to be adjusted (+persistentvolumeclaims). --- README.md | 26 ++ kubernetes/base/rbac.yaml | 3 + pkg/descheduler/descheduler.go | 2 +- .../plugins/defaultevictor/defaultevictor.go | 108 ++++- .../defaultevictor/defaultevictor_test.go | 200 ++++++++++ pkg/framework/plugins/defaultevictor/types.go | 31 ++ .../plugins/defaultevictor/validation.go | 11 + .../plugins/defaultevictor/validation_test.go | 27 ++ .../defaultevictor/zz_generated.deepcopy.go | 47 +++ test/e2e/e2e_podswithpvc_test.go | 373 ++++++++++++++++++ test/test_utils.go | 19 + 11 files changed, 837 insertions(+), 10 deletions(-) create mode 100644 test/e2e/e2e_podswithpvc_test.go diff --git a/README.md b/README.md index b4233a646..0b11860d0 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,31 @@ The Default Evictor Plugin is used by default for filtering pods before processi | `"PodsWithoutPDB"` | Prevents eviction of Pods without a PodDisruptionBudget (PDB). | | `"PodsWithResourceClaims"` | Prevents eviction of Pods using ResourceClaims. | + +#### Protecting pods using specific Storage Classes + +With the `PodsWithPVC` protection enabled all pods using PVCs are protected from eviction by default, if needed you can restrict the protection by filtering by PVC storage class. When filtering out by storage class, only pods using PVCs with the specified storage classes are protected from eviction. For example: + +```yaml +apiVersion: "descheduler/v1alpha2" +kind: "DeschedulerPolicy" +profiles: +- name: ProfileName + pluginConfig: + - name: "DefaultEvictor" + args: + podProtections: + extraEnabled: + - PodsWithPVC + config: + PodsWithPVC: + protectedStorageClasses: + - name: storage-class-0 + - name: storage-class-1 + +``` +This example will protect pods using PVCs with storage classes `storage-class-0` and `storage-class-1` from eviction. + ### Example policy As part of the policy, you will start deciding which top level configuration to use, then which Evictor plugin to use (if you have your own, the Default Evictor if not), followed by deciding the configuration passed to the Evictor Plugin. By default, the Default Evictor is enabled for both `filter` and `preEvictionFilter` extension points. After that you will enable/disable eviction strategies plugins and configure them properly. @@ -228,6 +253,7 @@ profiles: #- "PodsWithPVC" #- "PodsWithoutPDB" #- "PodsWithResourceClaims" + config: {} nodeFit: true minReplicas: 2 plugins: diff --git a/kubernetes/base/rbac.yaml b/kubernetes/base/rbac.yaml index cd5bdab8a..a87e99415 100644 --- a/kubernetes/base/rbac.yaml +++ b/kubernetes/base/rbac.yaml @@ -35,6 +35,9 @@ rules: - apiGroups: ["metrics.k8s.io"] resources: ["nodes", "pods"] verbs: ["get", "list"] +- apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "watch", "list"] --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index d9bb313d0..e6f5940d9 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -164,7 +164,7 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), // Used by the defaultevictor plugin policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin - + v1.SchemeGroupVersion.WithResource("persistentvolumeclaims"), // Used by the defaultevictor plugin ) // Used by the defaultevictor plugin getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) diff --git a/pkg/framework/plugins/defaultevictor/defaultevictor.go b/pkg/framework/plugins/defaultevictor/defaultevictor.go index 909473f2f..f5f91863b 100644 --- a/pkg/framework/plugins/defaultevictor/defaultevictor.go +++ b/pkg/framework/plugins/defaultevictor/defaultevictor.go @@ -17,11 +17,13 @@ import ( "context" "errors" "fmt" + "maps" "slices" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/informers" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -122,13 +124,67 @@ func applyEffectivePodProtections(d *DefaultEvictor, podProtections []PodProtect applyFailedBarePodsProtection(d, protectionMap) applyLocalStoragePodsProtection(d, protectionMap) applyDaemonSetPodsProtection(d, protectionMap) - applyPvcPodsProtection(d, protectionMap) + applyPVCPodsProtection(d, protectionMap) applyPodsWithoutPDBProtection(d, protectionMap, handle) applyPodsWithResourceClaimsProtection(d, protectionMap) return nil } +// protectedPVCStorageClasses returns the list of storage classes that should +// be protected from eviction. If the list is empty or nil then all storage +// classes are protected (assuming PodsWithPVC protection is enabled). +func protectedPVCStorageClasses(d *DefaultEvictor) []ProtectedStorageClass { + protcfg := d.args.PodProtections.Config + if protcfg == nil { + return nil + } + scconfig := protcfg.PodsWithPVC + if scconfig == nil { + return nil + } + return scconfig.ProtectedStorageClasses +} + +// podStorageClasses returns a list of storage classes referred by a pod. We +// need this when assessing if a pod should be protected because it refers to a +// protected storage class. +func podStorageClasses(inf informers.SharedInformerFactory, pod *v1.Pod) ([]string, error) { + lister := inf.Core().V1().PersistentVolumeClaims().Lister().PersistentVolumeClaims( + pod.Namespace, + ) + + referred := map[string]bool{} + for _, vol := range pod.Spec.Volumes { + if vol.PersistentVolumeClaim == nil { + continue + } + + claim, err := lister.Get(vol.PersistentVolumeClaim.ClaimName) + if err != nil { + return nil, fmt.Errorf( + "failed to get persistent volume claim %q/%q: %w", + pod.Namespace, vol.PersistentVolumeClaim.ClaimName, err, + ) + } + + // this should never happen as once a pvc is created with a nil + // storageClass it is automatically picked up by the default + // storage class. By returning an error here we make the pod + // protected from eviction. + if claim.Spec.StorageClassName == nil || *claim.Spec.StorageClassName == "" { + return nil, fmt.Errorf( + "failed to resolve storage class for pod %q/%q", + pod.Namespace, claim.Name, + ) + } + + referred[*claim.Spec.StorageClassName] = true + } + + return slices.Collect(maps.Keys(referred)), nil +} + func applyFailedBarePodsProtection(d *DefaultEvictor, protectionMap map[PodProtection]bool) { isProtectionEnabled := protectionMap[FailedBarePods] if !isProtectionEnabled { @@ -206,16 +262,50 @@ func applyDaemonSetPodsProtection(d *DefaultEvictor, protectionMap map[PodProtec } } -func applyPvcPodsProtection(d *DefaultEvictor, protectionMap map[PodProtection]bool) { - isProtectionEnabled := protectionMap[PodsWithPVC] - if isProtectionEnabled { - d.constraints = append(d.constraints, func(pod *v1.Pod) error { - if utils.IsPodWithPVC(pod) { - return fmt.Errorf("pod with PVC is protected against eviction") +// applyPVCPodsProtection protects pods that refer to a PVC from eviction. If +// the user has specified a list of storage classes to protect then only pods +// referring to PVCs of those storage classes are protected. +func applyPVCPodsProtection(d *DefaultEvictor, enabledProtections map[PodProtection]bool) { + if !enabledProtections[PodsWithPVC] { + return + } + + // if the user isn't filtering by storage classes we protect all pods + // referring to a PVC. + protected := protectedPVCStorageClasses(d) + if len(protected) == 0 { + d.constraints = append( + d.constraints, + func(pod *v1.Pod) error { + if utils.IsPodWithPVC(pod) { + return fmt.Errorf("pod with PVC is protected against eviction") + } + return nil + }, + ) + return + } + + protectedsc := map[string]bool{} + for _, class := range protected { + protectedsc[class.Name] = true + } + + d.constraints = append( + d.constraints, func(pod *v1.Pod) error { + classes, err := podStorageClasses(d.handle.SharedInformerFactory(), pod) + if err != nil { + return err + } + for _, class := range classes { + if !protectedsc[class] { + continue + } + return fmt.Errorf("pod using protected storage class %q", class) } return nil - }) - } + }, + ) } func applyPodsWithoutPDBProtection(d *DefaultEvictor, protectionMap map[PodProtection]bool, handle frameworktypes.Handle) { diff --git a/pkg/framework/plugins/defaultevictor/defaultevictor_test.go b/pkg/framework/plugins/defaultevictor/defaultevictor_test.go index 0f2484788..61dd90e57 100644 --- a/pkg/framework/plugins/defaultevictor/defaultevictor_test.go +++ b/pkg/framework/plugins/defaultevictor/defaultevictor_test.go @@ -16,6 +16,7 @@ package defaultevictor import ( "context" "fmt" + "reflect" "slices" "testing" "time" @@ -28,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" + "k8s.io/klog/v2" "k8s.io/utils/ptr" "sigs.k8s.io/descheduler/pkg/api" evictionutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" @@ -55,6 +57,7 @@ type testCase struct { ignorePodsWithoutPDB bool podProtections PodProtections noEvictionPolicy NoEvictionPolicy + pvcs []*v1.PersistentVolumeClaim } func TestDefaultEvictorPreEvictionFilter(t *testing.T) { @@ -879,6 +882,144 @@ func TestDefaultEvictorFilter(t *testing.T) { }, result: false, }, + { + description: "Pod using StorageClass is not evicted because 'PodsWithPVC' is in ExtraEnabled", + pods: []*v1.Pod{ + test.BuildTestPod("p23", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "pvc", VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "foo", + }, + }, + }, + } + }), + }, + podProtections: PodProtections{ + ExtraEnabled: []PodProtection{PodsWithPVC}, + Config: &PodProtectionsConfig{ + PodsWithPVC: &PodsWithPVCConfig{ + ProtectedStorageClasses: []ProtectedStorageClass{ + { + Name: "standard", + }, + }, + }, + }, + }, + pvcs: []*v1.PersistentVolumeClaim{ + test.BuildTestPVC("foo", "standard"), + }, + result: false, + }, + { + description: "Pod using unprotected StorageClass is evicted even though 'PodsWithPVC' is in ExtraEnabled", + pods: []*v1.Pod{ + test.BuildTestPod("p24", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "pvc", VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "foo", + }, + }, + }, + } + }), + }, + podProtections: PodProtections{ + ExtraEnabled: []PodProtection{PodsWithPVC}, + Config: &PodProtectionsConfig{ + PodsWithPVC: &PodsWithPVCConfig{ + ProtectedStorageClasses: []ProtectedStorageClass{ + { + Name: "protected", + }, + }, + }, + }, + }, + pvcs: []*v1.PersistentVolumeClaim{ + test.BuildTestPVC("foo", "unprotected"), + }, + result: true, + }, + { + description: "Pod using unexisting PVC is not evicted because we cannot determine if storage class is protected or not", + pods: []*v1.Pod{ + test.BuildTestPod("p25", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "pvc", VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "foo", + }, + }, + }, + } + }), + }, + podProtections: PodProtections{ + ExtraEnabled: []PodProtection{PodsWithPVC}, + Config: &PodProtectionsConfig{ + PodsWithPVC: &PodsWithPVCConfig{ + ProtectedStorageClasses: []ProtectedStorageClass{ + { + Name: "protected", + }, + }, + }, + }, + }, + pvcs: []*v1.PersistentVolumeClaim{}, + result: false, + }, + { + description: "Pod using protected and unprotected StorageClasses is not evicted", + pods: []*v1.Pod{ + test.BuildTestPod("p26", 400, 0, n1.Name, func(pod *v1.Pod) { + pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod.Spec.Volumes = []v1.Volume{ + { + Name: "protected-pvc", VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "protected", + }, + }, + }, + { + Name: "unprotected-pvc", VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "unprotected", + }, + }, + }, + } + }), + }, + podProtections: PodProtections{ + ExtraEnabled: []PodProtection{PodsWithPVC}, + Config: &PodProtectionsConfig{ + PodsWithPVC: &PodsWithPVCConfig{ + ProtectedStorageClasses: []ProtectedStorageClass{ + { + Name: "protected", + }, + }, + }, + }, + }, + pvcs: []*v1.PersistentVolumeClaim{ + test.BuildTestPVC("protected", "protected"), + test.BuildTestPVC("unprotected", "unprotected"), + }, + result: false, + }, } for _, test := range testCases { @@ -953,12 +1094,16 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin for _, pdb := range test.pdbs { objs = append(objs, pdb) } + for _, pvc := range test.pvcs { + objs = append(objs, pvc) + } fakeClient := fake.NewSimpleClientset(objs...) sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) podInformer := sharedInformerFactory.Core().V1().Pods().Informer() _ = sharedInformerFactory.Policy().V1().PodDisruptionBudgets().Lister() + _ = sharedInformerFactory.Core().V1().PersistentVolumeClaims().Lister() getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer) if err != nil { @@ -1117,3 +1262,58 @@ func slicesEqualUnordered(expected, actual []PodProtection) bool { } return true } + +func Test_protectedPVCStorageClasses(t *testing.T) { + tests := []struct { + name string + args *DefaultEvictorArgs + expected []ProtectedStorageClass + }{ + { + name: "no PodProtections config", + args: &DefaultEvictorArgs{}, + expected: nil, + }, + { + name: "no PodsWithPVC config", + args: &DefaultEvictorArgs{ + PodProtections: PodProtections{ + Config: &PodProtectionsConfig{}, + }, + }, + expected: nil, + }, + { + name: "storage classes specified", + args: &DefaultEvictorArgs{ + PodProtections: PodProtections{ + Config: &PodProtectionsConfig{ + PodsWithPVC: &PodsWithPVCConfig{ + ProtectedStorageClasses: []ProtectedStorageClass{ + {Name: "sc1"}, + {Name: "sc2"}, + }, + }, + }, + }, + }, + expected: []ProtectedStorageClass{ + {Name: "sc1"}, + {Name: "sc2"}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ev := &DefaultEvictor{ + logger: klog.NewKlogr(), + args: test.args, + } + result := protectedPVCStorageClasses(ev) + if !reflect.DeepEqual(result, test.expected) { + t.Errorf("Expected %v, got %v", test.expected, result) + } + }) + } +} diff --git a/pkg/framework/plugins/defaultevictor/types.go b/pkg/framework/plugins/defaultevictor/types.go index 7c6600d47..2403c939a 100644 --- a/pkg/framework/plugins/defaultevictor/types.go +++ b/pkg/framework/plugins/defaultevictor/types.go @@ -75,6 +75,37 @@ type PodProtections struct { // DefaultDisabled specifies which default protection policies should be disabled. // Supports: PodsWithLocalStorage, DaemonSetPods, SystemCriticalPods, FailedBarePods DefaultDisabled []PodProtection `json:"defaultDisabled,omitempty"` + + // Config holds configuration for pod protection policies. Depending on + // the enabled policies this may be required. For instance, when + // enabling the PodsWithPVC policy the user may specify which storage + // classes should be protected. + Config *PodProtectionsConfig `json:"config,omitempty"` +} + +// PodProtectionsConfig holds configuration for pod protection policies. The +// name of the fields here must be equal to a protection name. This struct is +// meant to be extended as more protection policies are added. +// +k8s:deepcopy-gen=true +type PodProtectionsConfig struct { + PodsWithPVC *PodsWithPVCConfig `json:"PodsWithPVC,omitempty"` +} + +// PodsWithPVCConfig holds configuration for the PodsWithPVC protection. +// +k8s:deepcopy-gen=true +type PodsWithPVCConfig struct { + // ProtectedStorageClasses is a list of storage classes that we want to + // protect. i.e. if a pod refers to one of these storage classes it is + // protected from being evicted. If none is provided then all pods with + // PVCs are protected from eviction. + ProtectedStorageClasses []ProtectedStorageClass `json:"protectedStorageClasses,omitempty"` +} + +// ProtectedStorageClass is used to determine what storage classes are +// protected when the PodsWithPVC protection is enabled. This object exists +// so we can later on extend it with more configuration if needed. +type ProtectedStorageClass struct { + Name string `json:"name"` } // defaultPodProtections holds the list of protection policies that are enabled by default. diff --git a/pkg/framework/plugins/defaultevictor/validation.go b/pkg/framework/plugins/defaultevictor/validation.go index 6dac513eb..28f2231ff 100644 --- a/pkg/framework/plugins/defaultevictor/validation.go +++ b/pkg/framework/plugins/defaultevictor/validation.go @@ -72,6 +72,17 @@ func ValidateDefaultEvictorArgs(obj runtime.Object) error { if hasDuplicates(args.PodProtections.ExtraEnabled) { allErrs = append(allErrs, fmt.Errorf("PodProtections.ExtraEnabled contains duplicate entries")) } + + if slices.Contains(args.PodProtections.ExtraEnabled, PodsWithPVC) { + if args.PodProtections.Config != nil && args.PodProtections.Config.PodsWithPVC != nil { + protectedsc := args.PodProtections.Config.PodsWithPVC.ProtectedStorageClasses + for i, sc := range protectedsc { + if sc.Name == "" { + allErrs = append(allErrs, fmt.Errorf("PodProtections.Config.PodsWithPVC.ProtectedStorageClasses[%d] name cannot be empty", i)) + } + } + } + } } return utilerrors.NewAggregate(allErrs) diff --git a/pkg/framework/plugins/defaultevictor/validation_test.go b/pkg/framework/plugins/defaultevictor/validation_test.go index ae8c8c120..ab78b4e19 100644 --- a/pkg/framework/plugins/defaultevictor/validation_test.go +++ b/pkg/framework/plugins/defaultevictor/validation_test.go @@ -198,6 +198,33 @@ func TestValidateDefaultEvictorArgs(t *testing.T) { }, errInfo: fmt.Errorf(`[noEvictionPolicy accepts only ["Preferred" "Mandatory"] values, invalid pod protection policy in DefaultDisabled: "PodsWithoutPDB". Valid options are: [PodsWithLocalStorage SystemCriticalPods FailedBarePods DaemonSetPods], PodProtections.DefaultDisabled contains duplicate entries, PodProtections.ExtraEnabled contains duplicate entries]`), }, + { + name: "Protected storage classes without storage class name", + args: &DefaultEvictorArgs{ + PodProtections: PodProtections{ + ExtraEnabled: []PodProtection{PodsWithPVC}, + Config: &PodProtectionsConfig{ + PodsWithPVC: &PodsWithPVCConfig{ + ProtectedStorageClasses: []ProtectedStorageClass{ + { + Name: "", + }, + { + Name: "protected-storage-class-0", + }, + { + Name: "", + }, + { + Name: "protected-storage-class-1", + }, + }, + }, + }, + }, + }, + errInfo: fmt.Errorf(`[PodProtections.Config.PodsWithPVC.ProtectedStorageClasses[0] name cannot be empty, PodProtections.Config.PodsWithPVC.ProtectedStorageClasses[2] name cannot be empty]`), + }, } for _, testCase := range tests { diff --git a/pkg/framework/plugins/defaultevictor/zz_generated.deepcopy.go b/pkg/framework/plugins/defaultevictor/zz_generated.deepcopy.go index 9faf7c10e..84199183a 100644 --- a/pkg/framework/plugins/defaultevictor/zz_generated.deepcopy.go +++ b/pkg/framework/plugins/defaultevictor/zz_generated.deepcopy.go @@ -81,6 +81,11 @@ func (in *PodProtections) DeepCopyInto(out *PodProtections) { *out = make([]PodProtection, len(*in)) copy(*out, *in) } + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(PodProtectionsConfig) + (*in).DeepCopyInto(*out) + } return } @@ -93,3 +98,45 @@ func (in *PodProtections) DeepCopy() *PodProtections { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PodProtectionsConfig) DeepCopyInto(out *PodProtectionsConfig) { + *out = *in + if in.PodsWithPVC != nil { + in, out := &in.PodsWithPVC, &out.PodsWithPVC + *out = new(PodsWithPVCConfig) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodProtectionsConfig. +func (in *PodProtectionsConfig) DeepCopy() *PodProtectionsConfig { + if in == nil { + return nil + } + out := new(PodProtectionsConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PodsWithPVCConfig) DeepCopyInto(out *PodsWithPVCConfig) { + *out = *in + if in.ProtectedStorageClasses != nil { + in, out := &in.ProtectedStorageClasses, &out.ProtectedStorageClasses + *out = make([]ProtectedStorageClass, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodsWithPVCConfig. +func (in *PodsWithPVCConfig) DeepCopy() *PodsWithPVCConfig { + if in == nil { + return nil + } + out := new(PodsWithPVCConfig) + in.DeepCopyInto(out) + return out +} diff --git a/test/e2e/e2e_podswithpvc_test.go b/test/e2e/e2e_podswithpvc_test.go new file mode 100644 index 000000000..d149964fa --- /dev/null +++ b/test/e2e/e2e_podswithpvc_test.go @@ -0,0 +1,373 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "os" + "strings" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/component-base/config" + "k8s.io/utils/ptr" + + storagev1 "k8s.io/api/storage/v1" + "sigs.k8s.io/descheduler/cmd/descheduler/app/options" + "sigs.k8s.io/descheduler/pkg/api" + apiv1alpha2 "sigs.k8s.io/descheduler/pkg/api/v1alpha2" + "sigs.k8s.io/descheduler/pkg/descheduler/client" + "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" + "sigs.k8s.io/descheduler/pkg/framework/plugins/removepodshavingtoomanyrestarts" +) + +// protectPodsWithPVCPolicy returns a descheduler policy that protects pods +// using PVCs of specific storage classes from eviction while, at the same +// time, evicting pods that have restarted more than 3 times. +func protectPodsWithPVCPolicy(namespace string, protectedsc []defaultevictor.ProtectedStorageClass) *apiv1alpha2.DeschedulerPolicy { + return &apiv1alpha2.DeschedulerPolicy{ + Profiles: []apiv1alpha2.DeschedulerProfile{ + { + Name: "ProtectPodsWithPVCPolicy", + PluginConfigs: []apiv1alpha2.PluginConfig{ + { + Name: removepodshavingtoomanyrestarts.PluginName, + Args: runtime.RawExtension{ + Object: &removepodshavingtoomanyrestarts.RemovePodsHavingTooManyRestartsArgs{ + PodRestartThreshold: 3, + IncludingInitContainers: true, + Namespaces: &api.Namespaces{ + Include: []string{namespace}, + }, + }, + }, + }, + { + Name: defaultevictor.PluginName, + Args: runtime.RawExtension{ + Object: &defaultevictor.DefaultEvictorArgs{ + PodProtections: defaultevictor.PodProtections{ + DefaultDisabled: []defaultevictor.PodProtection{ + defaultevictor.PodsWithLocalStorage, + }, + ExtraEnabled: []defaultevictor.PodProtection{ + defaultevictor.PodsWithPVC, + }, + Config: &defaultevictor.PodProtectionsConfig{ + PodsWithPVC: &defaultevictor.PodsWithPVCConfig{ + ProtectedStorageClasses: protectedsc, + }, + }, + }, + }, + }, + }, + }, + Plugins: apiv1alpha2.Plugins{ + Filter: apiv1alpha2.PluginSet{ + Enabled: []string{ + defaultevictor.PluginName, + }, + }, + Deschedule: apiv1alpha2.PluginSet{ + Enabled: []string{ + removepodshavingtoomanyrestarts.PluginName, + }, + }, + }, + }, + }, + } +} + +// TestProtectPodsWithPVC tests that pods using PVCs are protected. +func TestProtectPodsWithPVC(t *testing.T) { + ctx := context.Background() + initPluginRegistry() + + cli, err := client.CreateClient( + config.ClientConnectionConfiguration{ + Kubeconfig: os.Getenv("KUBECONFIG"), + }, "", + ) + if err != nil { + t.Fatalf("error during kubernetes client creation with %v", err) + } + + // start by finding out what is the default storage class in the + // cluster. if none is found then this test can't run. + scs, err := cli.StorageV1().StorageClasses().List(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatalf("error listing storage classes: %v", err) + } + + var defclass *storagev1.StorageClass + for _, sc := range scs.Items { + if _, ok := sc.Annotations["storageclass.kubernetes.io/is-default-class"]; ok { + defclass = &sc + break + } + } + if defclass == nil { + t.Fatalf("no default storage class found, unable to run the test") + } + + // now we replicate the default storage class so we have two different + // storage classes in the cluster. this is useful to test protected vs + // unprotected pods using PVCs. + unprotectedsc := defclass.DeepCopy() + delete(unprotectedsc.Annotations, "storageclass.kubernetes.io/is-default-class") + unprotectedsc.ResourceVersion = "" + unprotectedsc.Name = "unprotected" + if _, err = cli.StorageV1().StorageClasses().Create(ctx, unprotectedsc, metav1.CreateOptions{}); err != nil { + t.Fatalf("error creating unprotected storage class: %v", err) + } + defer cli.StorageV1().StorageClasses().Delete(ctx, unprotectedsc.Name, metav1.DeleteOptions{}) + + // this is the namespace we are going to use for all testing + t.Logf("creating testing namespace %v", t.Name()) + namespace := &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("e2e-%s", strings.ToLower(t.Name())), + }, + } + if _, err := cli.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", namespace.Name) + } + defer cli.CoreV1().Namespaces().Delete(ctx, namespace.Name, metav1.DeleteOptions{}) + + for _, tc := range []struct { + name string + policy *apiv1alpha2.DeschedulerPolicy + enableGracePeriod bool + expectedEvictedPodCount uint + pvcs []*v1.PersistentVolumeClaim + volumes []v1.Volume + }{ + { + name: "evict pods from unprotected storage class", + policy: protectPodsWithPVCPolicy( + namespace.Name, []defaultevictor.ProtectedStorageClass{ + { + Name: defclass.Name, + }, + }, + ), + expectedEvictedPodCount: 4, + pvcs: []*v1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-unprotected-claim", + Namespace: namespace.Name, + }, + Spec: v1.PersistentVolumeClaimSpec{ + StorageClassName: ptr.To(unprotectedsc.Name), + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.VolumeResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + volumes: []v1.Volume{ + { + Name: "test-unprotected-volume", + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "test-unprotected-claim", + }, + }, + }, + }, + }, + { + name: "preserve pods from protected storage class", + policy: protectPodsWithPVCPolicy( + namespace.Name, []defaultevictor.ProtectedStorageClass{ + { + Name: defclass.Name, + }, + }, + ), + expectedEvictedPodCount: 0, + pvcs: []*v1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-protected-claim", + Namespace: namespace.Name, + }, + Spec: v1.PersistentVolumeClaimSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + Resources: v1.VolumeResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + volumes: []v1.Volume{ + { + Name: "test-protected-volume", + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "test-protected-claim", + }, + }, + }, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + t.Logf("creating testing pvcs in namespace %v", namespace.Name) + for _, pvc := range tc.pvcs { + if _, err = cli.CoreV1().PersistentVolumeClaims(namespace.Name).Create(ctx, pvc, metav1.CreateOptions{}); err != nil { + t.Fatalf("error creating PVC: %v", err) + } + defer cli.CoreV1().PersistentVolumeClaims(namespace.Name).Delete(ctx, pvc.Name, metav1.DeleteOptions{}) + } + + deploy := buildTestDeployment( + "restart-pod", + namespace.Name, + 4, + map[string]string{"test": "restart-pod", "name": "test-toomanyrestarts"}, + func(deployment *appsv1.Deployment) { + deployment.Spec.Template.Spec.Containers[0].Command = []string{"/bin/sh"} + deployment.Spec.Template.Spec.Containers[0].Args = []string{"-c", "sleep 1s && exit 1"} + }, + ) + deploy.Spec.Template.Spec.Volumes = tc.volumes + + t.Logf("creating deployment %v", deploy.Name) + if _, err := cli.AppsV1().Deployments(deploy.Namespace).Create(ctx, deploy, metav1.CreateOptions{}); err != nil { + t.Fatalf("error creating deployment: %v", err) + } + defer cli.AppsV1().Deployments(deploy.Namespace).Delete(ctx, deploy.Name, metav1.DeleteOptions{}) + + // wait for 3 restarts + waitPodRestartCount(ctx, cli, namespace.Name, t, 3) + + rs, err := options.NewDeschedulerServer() + if err != nil { + t.Fatalf("unable to initialize server: %v\n", err) + } + rs.Client, rs.EventClient, rs.DefaultFeatureGates = cli, cli, initFeatureGates() + preRunNames := sets.NewString(getCurrentPodNames(ctx, cli, namespace.Name, t)...) + + // deploy the descheduler with the configured policy + policycm, err := deschedulerPolicyConfigMap(tc.policy) + if err != nil { + t.Fatalf("Error creating %q CM: %v", policycm.Name, err) + } + + t.Logf("creating %q policy CM with PodsWithPVC protection enabled...", policycm.Name) + if _, err = cli.CoreV1().ConfigMaps(policycm.Namespace).Create( + ctx, policycm, metav1.CreateOptions{}, + ); err != nil { + t.Fatalf("error creating %q CM: %v", policycm.Name, err) + } + + defer func() { + t.Logf("deleting %q CM...", policycm.Name) + if err := cli.CoreV1().ConfigMaps(policycm.Namespace).Delete( + ctx, policycm.Name, metav1.DeleteOptions{}, + ); err != nil { + t.Fatalf("unable to delete %q CM: %v", policycm.Name, err) + } + }() + + desdep := deschedulerDeployment(namespace.Name) + t.Logf("creating descheduler deployment %v", desdep.Name) + if _, err := cli.AppsV1().Deployments(desdep.Namespace).Create( + ctx, desdep, metav1.CreateOptions{}, + ); err != nil { + t.Fatalf("error creating %q deployment: %v", desdep.Name, err) + } + + deschedulerPodName := "" + defer func() { + if deschedulerPodName != "" { + printPodLogs(ctx, t, cli, deschedulerPodName) + } + + t.Logf("deleting %q deployment...", desdep.Name) + if err := cli.AppsV1().Deployments(desdep.Namespace).Delete( + ctx, desdep.Name, metav1.DeleteOptions{}, + ); err != nil { + t.Fatalf("unable to delete %q deployment: %v", desdep.Name, err) + } + + waitForPodsToDisappear(ctx, t, cli, desdep.Labels, desdep.Namespace) + }() + + t.Logf("waiting for the descheduler pod running") + deschedulerPods := waitForPodsRunning(ctx, t, cli, desdep.Labels, 1, desdep.Namespace) + if len(deschedulerPods) != 0 { + deschedulerPodName = deschedulerPods[0].Name + } + + if err := wait.PollUntilContextTimeout( + ctx, 5*time.Second, time.Minute, true, + func(ctx context.Context) (bool, error) { + podList, err := cli.CoreV1().Pods(namespace.Name).List( + ctx, metav1.ListOptions{}, + ) + if err != nil { + t.Fatalf("error listing pods: %v", err) + } + + names := []string{} + for _, item := range podList.Items { + names = append(names, item.Name) + } + + currentRunNames := sets.NewString(names...) + actualEvictedPod := preRunNames.Difference(currentRunNames) + actualEvictedPodCount := uint(actualEvictedPod.Len()) + if actualEvictedPodCount < tc.expectedEvictedPodCount { + t.Logf( + "expecting %v number of pods evicted, got %v instead", + tc.expectedEvictedPodCount, actualEvictedPodCount, + ) + return false, nil + } + + return true, nil + }, + ); err != nil { + t.Fatalf("error waiting for descheduler running: %v", err) + } + + waitForTerminatingPodsToDisappear(ctx, t, cli, namespace.Name) + }) + } +} diff --git a/test/test_utils.go b/test/test_utils.go index 5a3169dd1..36f8e21f0 100644 --- a/test/test_utils.go +++ b/test/test_utils.go @@ -89,6 +89,25 @@ func BuildTestPDB(name, appLabel string) *policyv1.PodDisruptionBudget { return pdb } +func BuildTestPVC(name, storageClass string) *v1.PersistentVolumeClaim { + pvc := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: name, + }, + Spec: v1.PersistentVolumeClaimSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}, + StorageClassName: &storageClass, + Resources: v1.VolumeResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + } + return pvc +} + // BuildPodMetrics creates a test podmetrics with given parameters. func BuildPodMetrics(name string, millicpu, mem int64) *v1beta1.PodMetrics { return &v1beta1.PodMetrics{