diff --git a/README.md b/README.md index c94f2243c..989fb4981 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ Table of Contents - [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint) - [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts) - [PodLifeTime](#podlifetime) + - [RemoveFailedPods](#removefailedpods) - [Filter Pods](#filter-pods) - [Namespace filtering](#namespace-filtering) - [Priority filtering](#priority-filtering) @@ -517,6 +518,47 @@ strategies: - "Pending" ``` +### RemoveFailedPods + +This strategy evicts pods that are in failed status phase. +You can provide an optional parameter to filter by failed `reasons`. +`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`. +You can specify an optional parameter `minPodLifeTimeSeconds` to evict pods that are older than specified seconds. +Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod +has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction. + +**Parameters:** + +|Name|Type| +|---|---| +|`minPodLifeTimeSeconds`|uint| +|`excludeOwnerKinds`|list(string)| +|`reasons`|list(string)| +|`includingInitContainers`|bool| +|`thresholdPriority`|int (see [priority filtering](#priority-filtering))| +|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))| +|`namespaces`|(see [namespace filtering](#namespace-filtering))| +|`labelSelector`|(see [label filtering](#label-filtering))| +|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))| + +**Example:** + +```yaml +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "RemoveFailedPods": + enabled: true + params: + failedPods: + reasons: + - "NodeAffinity" + includingInitContainers: true + excludeOwnerKinds: + - "Job" + minPodLifeTimeSeconds: 3600 +``` + ## Filter Pods ### Namespace filtering @@ -529,6 +571,7 @@ The following strategies accept a `namespaces` parameter which allows to specify * `RemovePodsViolatingInterPodAntiAffinity` * `RemoveDuplicates` * `RemovePodsViolatingTopologySpreadConstraint` +* `RemoveFailedPods` For example: @@ -620,6 +663,7 @@ to filter pods by their labels: * `RemovePodsViolatingNodeAffinity` * `RemovePodsViolatingInterPodAntiAffinity` * `RemovePodsViolatingTopologySpreadConstraint` +* `RemoveFailedPods` This allows running strategies among pods the descheduler is interested in. @@ -654,6 +698,7 @@ The following strategies accept a `nodeFit` boolean parameter which can optimize * `RemovePodsViolatingNodeTaints` * `RemovePodsViolatingTopologySpreadConstraint` * `RemovePodsHavingTooManyRestarts` +* `RemoveFailedPods` If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`: - A `nodeSelector` on the pod diff --git a/examples/failed-pods.yaml b/examples/failed-pods.yaml new file mode 100644 index 000000000..a7ca36c9a --- /dev/null +++ b/examples/failed-pods.yaml @@ -0,0 +1,14 @@ +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "RemoveFailedPods": + enabled: true + params: + failedPods: + reasons: + - "OutOfcpu" + - "CreateContainerConfigError" + includingInitContainers: true + excludeOwnerKinds: + - "Job" + minPodLifeTimeSeconds: 3600 # 1 hour diff --git a/pkg/api/types.go b/pkg/api/types.go index afb2acbc3..af2c063fb 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -75,6 +75,7 @@ type StrategyParameters struct { PodsHavingTooManyRestarts *PodsHavingTooManyRestarts PodLifeTime *PodLifeTime RemoveDuplicates *RemoveDuplicates + FailedPods *FailedPods IncludeSoftConstraints bool Namespaces *Namespaces ThresholdPriority *int32 @@ -105,3 +106,10 @@ type PodLifeTime struct { MaxPodLifeTimeSeconds *uint PodStatusPhases []string } + +type FailedPods struct { + ExcludeOwnerKinds []string + MinPodLifetimeSeconds *uint + Reasons []string + IncludingInitContainers bool +} diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index f18a5ee2a..700afdaa6 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -73,6 +73,7 @@ type StrategyParameters struct { PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"` PodLifeTime *PodLifeTime `json:"podLifeTime,omitempty"` RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"` + FailedPods *FailedPods `json:"failedPods,omitempty"` IncludeSoftConstraints bool `json:"includeSoftConstraints"` Namespaces *Namespaces `json:"namespaces"` ThresholdPriority *int32 `json:"thresholdPriority"` @@ -103,3 +104,10 @@ type PodLifeTime struct { MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"` PodStatusPhases []string `json:"podStatusPhases,omitempty"` } + +type FailedPods struct { + ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"` + MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"` + Reasons []string `json:"reasons,omitempty"` + IncludingInitContainers bool `json:"includingInitContainers,omitempty"` +} diff --git a/pkg/api/v1alpha1/zz_generated.conversion.go b/pkg/api/v1alpha1/zz_generated.conversion.go index 1b91f8f50..4f071f2bc 100644 --- a/pkg/api/v1alpha1/zz_generated.conversion.go +++ b/pkg/api/v1alpha1/zz_generated.conversion.go @@ -56,6 +56,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*FailedPods)(nil), (*api.FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_FailedPods_To_api_FailedPods(a.(*FailedPods), b.(*api.FailedPods), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*api.FailedPods)(nil), (*FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_api_FailedPods_To_v1alpha1_FailedPods(a.(*api.FailedPods), b.(*FailedPods), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*Namespaces)(nil), (*api.Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha1_Namespaces_To_api_Namespaces(a.(*Namespaces), b.(*api.Namespaces), scope) }); err != nil { @@ -173,6 +183,32 @@ func Convert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.Des return autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in, out, s) } +func autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error { + out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds)) + out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds)) + out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons)) + out.IncludingInitContainers = in.IncludingInitContainers + return nil +} + +// Convert_v1alpha1_FailedPods_To_api_FailedPods is an autogenerated conversion function. +func Convert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error { + return autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in, out, s) +} + +func autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error { + out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds)) + out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds)) + out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons)) + out.IncludingInitContainers = in.IncludingInitContainers + return nil +} + +// Convert_api_FailedPods_To_v1alpha1_FailedPods is an autogenerated conversion function. +func Convert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error { + return autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in, out, s) +} + func autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error { out.Include = *(*[]string)(unsafe.Pointer(&in.Include)) out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude)) @@ -289,6 +325,7 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts)) out.PodLifeTime = (*api.PodLifeTime)(unsafe.Pointer(in.PodLifeTime)) out.RemoveDuplicates = (*api.RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates)) + out.FailedPods = (*api.FailedPods)(unsafe.Pointer(in.FailedPods)) out.IncludeSoftConstraints = in.IncludeSoftConstraints out.Namespaces = (*api.Namespaces)(unsafe.Pointer(in.Namespaces)) out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority)) @@ -309,6 +346,7 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts)) out.PodLifeTime = (*PodLifeTime)(unsafe.Pointer(in.PodLifeTime)) out.RemoveDuplicates = (*RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates)) + out.FailedPods = (*FailedPods)(unsafe.Pointer(in.FailedPods)) out.IncludeSoftConstraints = in.IncludeSoftConstraints out.Namespaces = (*Namespaces)(unsafe.Pointer(in.Namespaces)) out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority)) diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index b31b5a1a6..5aef14c46 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -103,6 +103,37 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FailedPods) DeepCopyInto(out *FailedPods) { + *out = *in + if in.ExcludeOwnerKinds != nil { + in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.MinPodLifetimeSeconds != nil { + in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds + *out = new(uint) + **out = **in + } + if in.Reasons != nil { + in, out := &in.Reasons, &out.Reasons + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods. +func (in *FailedPods) DeepCopy() *FailedPods { + if in == nil { + return nil + } + out := new(FailedPods) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Namespaces) DeepCopyInto(out *Namespaces) { *out = *in @@ -294,6 +325,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) { *out = new(RemoveDuplicates) (*in).DeepCopyInto(*out) } + if in.FailedPods != nil { + in, out := &in.FailedPods, &out.FailedPods + *out = new(FailedPods) + (*in).DeepCopyInto(*out) + } if in.Namespaces != nil { in, out := &in.Namespaces, &out.Namespaces *out = new(Namespaces) diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index a6fe8557e..fa453fca3 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -103,6 +103,37 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FailedPods) DeepCopyInto(out *FailedPods) { + *out = *in + if in.ExcludeOwnerKinds != nil { + in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.MinPodLifetimeSeconds != nil { + in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds + *out = new(uint) + **out = **in + } + if in.Reasons != nil { + in, out := &in.Reasons, &out.Reasons + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods. +func (in *FailedPods) DeepCopy() *FailedPods { + if in == nil { + return nil + } + out := new(FailedPods) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Namespaces) DeepCopyInto(out *Namespaces) { *out = *in @@ -294,6 +325,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) { *out = new(RemoveDuplicates) (*in).DeepCopyInto(*out) } + if in.FailedPods != nil { + in, out := &in.FailedPods, &out.FailedPods + *out = new(FailedPods) + (*in).DeepCopyInto(*out) + } if in.Namespaces != nil { in, out := &in.Namespaces, &out.Namespaces *out = new(Namespaces) diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 06bdefaf1..445c6b2f2 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -83,6 +83,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer "RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts, "PodLifeTime": strategies.PodLifeTime, "RemovePodsViolatingTopologySpreadConstraint": strategies.RemovePodsViolatingTopologySpreadConstraint, + "RemoveFailedPods": strategies.RemoveFailedPods, } nodeSelector := rs.NodeSelector diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index db9790691..4b97ebfa0 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -72,6 +72,19 @@ func ListPodsOnANode( client clientset.Interface, node *v1.Node, opts ...func(opts *Options), +) ([]*v1.Pod, error) { + fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed) + + return ListPodsOnANodeWithFieldSelector(ctx, client, node, fieldSelectorString, opts...) +} + +// ListPodsOnANodeWithFieldSelector lists all of the pods on a node using the filter selectors +func ListPodsOnANodeWithFieldSelector( + ctx context.Context, + client clientset.Interface, + node *v1.Node, + fieldSelectorString string, + opts ...func(opts *Options), ) ([]*v1.Pod, error) { options := &Options{} for _, opt := range opts { @@ -80,8 +93,6 @@ func ListPodsOnANode( pods := make([]*v1.Pod, 0) - fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed) - labelSelectorString := "" if options.labelSelector != nil { selector, err := metav1.LabelSelectorAsSelector(options.labelSelector) diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go new file mode 100644 index 000000000..e9c7ad23c --- /dev/null +++ b/pkg/descheduler/strategies/failedpods.go @@ -0,0 +1,163 @@ +package strategies + +import ( + "context" + "fmt" + "k8s.io/apimachinery/pkg/util/sets" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" + "sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation" +) + +// validatedFailedPodsStrategyParams contains validated strategy parameters +type validatedFailedPodsStrategyParams struct { + *validation.ValidatedStrategyParams + includingInitContainers bool + reasons sets.String + excludeOwnerKinds sets.String + minPodLifetimeSeconds *uint +} + +// RemoveFailedPods removes Pods that are in failed status phase. +func RemoveFailedPods( + ctx context.Context, + client clientset.Interface, + strategy api.DeschedulerStrategy, + nodes []*v1.Node, + podEvictor *evictions.PodEvictor, +) { + strategyParams, err := validateAndParseRemoveFailedPodsParams(ctx, client, strategy.Params) + if err != nil { + klog.ErrorS(err, "Invalid RemoveFailedPods parameters") + return + } + + evictable := podEvictor.Evictable( + evictions.WithPriorityThreshold(strategyParams.ThresholdPriority), + evictions.WithNodeFit(strategyParams.NodeFit), + evictions.WithLabelSelector(strategyParams.LabelSelector), + ) + + for _, node := range nodes { + klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) + fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase=" + string(v1.PodFailed) + pods, err := podutil.ListPodsOnANodeWithFieldSelector( + ctx, + client, + node, + fieldSelectorString, + podutil.WithFilter(evictable.IsEvictable), + podutil.WithNamespaces(strategyParams.IncludedNamespaces.UnsortedList()), + podutil.WithoutNamespaces(strategyParams.ExcludedNamespaces.UnsortedList()), + podutil.WithLabelSelector(strategy.Params.LabelSelector), + ) + if err != nil { + klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node)) + continue + } + + for i, pod := range pods { + if err = validateFailedPodShouldEvict(pod, *strategyParams); err != nil { + klog.V(4).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(pod)) + continue + } + + if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil { + klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) + break + } + } + } +} + +func validateAndParseRemoveFailedPodsParams( + ctx context.Context, + client clientset.Interface, + params *api.StrategyParameters, +) (*validatedFailedPodsStrategyParams, error) { + if params == nil { + return &validatedFailedPodsStrategyParams{}, nil + } + + strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params) + if err != nil { + return nil, err + } + + var reasons, excludeOwnerKinds sets.String + var includingInitContainers bool + var minPodLifetimeSeconds *uint + if params.FailedPods != nil { + reasons = sets.NewString(params.FailedPods.Reasons...) + includingInitContainers = params.FailedPods.IncludingInitContainers + excludeOwnerKinds = sets.NewString(params.FailedPods.ExcludeOwnerKinds...) + minPodLifetimeSeconds = params.FailedPods.MinPodLifetimeSeconds + } + + return &validatedFailedPodsStrategyParams{ + ValidatedStrategyParams: strategyParams, + includingInitContainers: includingInitContainers, + reasons: reasons, + excludeOwnerKinds: excludeOwnerKinds, + minPodLifetimeSeconds: minPodLifetimeSeconds, + }, nil +} + +// validateFailedPodShouldEvict looks at strategy params settings to see if the Pod +// should be evicted given the params in the PodFailed policy. +func validateFailedPodShouldEvict(pod *v1.Pod, strategyParams validatedFailedPodsStrategyParams) error { + var errs []error + + if strategyParams.minPodLifetimeSeconds != nil { + podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) + if podAgeSeconds < *strategyParams.minPodLifetimeSeconds { + errs = append(errs, fmt.Errorf("pod does not exceed the min age seconds of %d", *strategyParams.minPodLifetimeSeconds)) + } + } + + if len(strategyParams.excludeOwnerKinds) > 0 { + ownerRefList := podutil.OwnerRef(pod) + for _, owner := range ownerRefList { + if strategyParams.excludeOwnerKinds.Has(owner.Kind) { + errs = append(errs, fmt.Errorf("pod's owner kind of %s is excluded", owner.Kind)) + } + } + } + + if len(strategyParams.reasons) > 0 { + reasons := getFailedContainerStatusReasons(pod.Status.ContainerStatuses) + + if strategyParams.includingInitContainers { + reasons = append(reasons, getFailedContainerStatusReasons(pod.Status.InitContainerStatuses)...) + } + + if !strategyParams.reasons.HasAny(reasons...) { + errs = append(errs, fmt.Errorf("pod does not match any of the reasons")) + } + } + + return utilerrors.NewAggregate(errs) +} + +func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []string { + reasons := make([]string, 0) + + for _, containerStatus := range containerStatuses { + if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "" { + reasons = append(reasons, containerStatus.State.Waiting.Reason) + } + if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.Reason != "" { + reasons = append(reasons, containerStatus.State.Terminated.Reason) + } + } + + return reasons +} diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/descheduler/strategies/failedpods_test.go new file mode 100644 index 000000000..52f4f0574 --- /dev/null +++ b/pkg/descheduler/strategies/failedpods_test.go @@ -0,0 +1,274 @@ +package strategies + +import ( + "context" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "testing" + + v1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + "sigs.k8s.io/descheduler/test" +) + +var ( + OneHourInSeconds uint = 3600 +) + +func TestRemoveFailedPods(t *testing.T) { + ctx := context.Background() + + createStrategy := func(enabled, includingInitContainers bool, reasons, excludeKinds []string, minAgeSeconds *uint, nodeFit bool) api.DeschedulerStrategy { + return api.DeschedulerStrategy{ + Enabled: enabled, + Params: &api.StrategyParameters{ + FailedPods: &api.FailedPods{ + Reasons: reasons, + IncludingInitContainers: includingInitContainers, + ExcludeOwnerKinds: excludeKinds, + MinPodLifetimeSeconds: minAgeSeconds, + }, + NodeFit: nodeFit, + }, + } + } + + tests := []struct { + description string + nodes []*v1.Node + strategy api.DeschedulerStrategy + expectedEvictedPodCount int + pods []v1.Pod + }{ + { + description: "0 failures, 0 evictions", + strategy: createStrategy(true, false, nil, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{}, // no pods come back with field selector phase=Failed + }, + { + description: "1 container terminated with reason NodeAffinity, 1 eviction", + strategy: createStrategy(true, false, nil, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 1, + pods: []v1.Pod{ + buildTestPod("p1", "node1", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }), + }, + }, + { + description: "1 init container terminated with reason NodeAffinity, 1 eviction", + strategy: createStrategy(true, true, nil, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 1, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }, nil), + }, + }, + { + description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction", + strategy: createStrategy(true, true, nil, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 1, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"}, + }, nil), + }, + }, + { + description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions", + strategy: createStrategy(true, true, nil, nil, nil, false), + nodes: []*v1.Node{ + test.BuildTestNode("node1", 2000, 3000, 10, nil), + test.BuildTestNode("node2", 2000, 3000, 10, nil), + }, + expectedEvictedPodCount: 2, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, + }, nil), + buildTestPod("p2", "node2", &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, + }, nil), + }, + }, + { + description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction", + strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 1, + pods: []v1.Pod{ + buildTestPod("p1", "node1", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, + }), + }, + }, + { + description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction", + strategy: createStrategy(true, false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 1, + pods: []v1.Pod{ + buildTestPod("p1", "node1", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"}, + }), + }, + }, + { + description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction", + strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{ + buildTestPod("p1", "node1", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }), + }, + }, + { + description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction", + strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"}, + }, nil), + }, + }, + { + description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction", + strategy: createStrategy(true, false, nil, nil, &OneHourInSeconds, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{ + buildTestPod("p1", "node1", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }), + }, + }, + { + description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction", + strategy: createStrategy(true, false, nil, nil, nil, true), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) { + node.Spec.Unschedulable = true + })}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{ + buildTestPod("p1", "node1", nil, &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }), + }, + }, + { + description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction", + strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }, nil), + }, + }, + { + description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction", + strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false), + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 1, + pods: []v1.Pod{ + buildTestPod("p1", "node1", &v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"}, + }, nil), + }, + }, + } + for _, tc := range tests { + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, &v1.PodList{Items: tc.pods}, nil + }) + + podEvictor := evictions.NewPodEvictor( + fakeClient, + policyv1.SchemeGroupVersion.String(), + false, + 100, + tc.nodes, + false, + false, + false, + ) + + RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor) + actualEvictedPodCount := podEvictor.TotalEvicted() + if actualEvictedPodCount != tc.expectedEvictedPodCount { + t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) + } + } +} + +func TestValidRemoveFailedPodsParams(t *testing.T) { + ctx := context.Background() + fakeClient := &fake.Clientset{} + testCases := []struct { + name string + params *api.StrategyParameters + }{ + {name: "validate nil params", params: nil}, + {name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ + Reasons: []string{"CreateContainerConfigError"}, + }}}, + {name: "validate includingInitContainers params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ + IncludingInitContainers: true, + }}}, + {name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ + ExcludeOwnerKinds: []string{"Job"}, + }}}, + {name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ + MinPodLifetimeSeconds: &OneHourInSeconds, + }}}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + params, err := validateAndParseRemoveFailedPodsParams(ctx, fakeClient, tc.params) + if err != nil { + t.Errorf("strategy params should be valid but got err: %v", err.Error()) + } + if params == nil { + t.Errorf("strategy params should return a ValidatedFailedPodsStrategyParams but got nil") + } + }) + } +} + +func buildTestPod(podName, nodeName string, initContainerState, containerState *v1.ContainerState) v1.Pod { + pod := test.BuildTestPod(podName, 1, 1, nodeName, func(p *v1.Pod) { + ps := v1.PodStatus{} + + if initContainerState != nil { + ps.InitContainerStatuses = []v1.ContainerStatus{{State: *initContainerState}} + ps.Phase = v1.PodFailed + } + + if containerState != nil { + ps.ContainerStatuses = []v1.ContainerStatus{{State: *containerState}} + ps.Phase = v1.PodFailed + } + + p.Status = ps + }) + pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() + pod.ObjectMeta.SetCreationTimestamp(metav1.Now()) + return *pod +} diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 36c857a6b..8dda686d2 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -27,12 +27,13 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" + "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" + "sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation" "sigs.k8s.io/descheduler/pkg/utils" ) @@ -47,53 +48,6 @@ type topology struct { pods []*v1.Pod } -// topologySpreadStrategyParams contains validated strategy parameters -type topologySpreadStrategyParams struct { - thresholdPriority int32 - includedNamespaces sets.String - excludedNamespaces sets.String - labelSelector labels.Selector - nodeFit bool -} - -// validateAndParseTopologySpreadParams will validate parameters to ensure that they do not contain invalid values. -func validateAndParseTopologySpreadParams(ctx context.Context, client clientset.Interface, params *api.StrategyParameters) (*topologySpreadStrategyParams, error) { - var includedNamespaces, excludedNamespaces sets.String - if params == nil { - return &topologySpreadStrategyParams{includedNamespaces: includedNamespaces, excludedNamespaces: excludedNamespaces}, nil - } - // At most one of include/exclude can be set - if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { - return nil, fmt.Errorf("only one of Include/Exclude namespaces can be set") - } - if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { - return nil, fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") - } - thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, params) - if err != nil { - return nil, fmt.Errorf("failed to get threshold priority from strategy's params: %+v", err) - } - if params.Namespaces != nil { - includedNamespaces = sets.NewString(params.Namespaces.Include...) - excludedNamespaces = sets.NewString(params.Namespaces.Exclude...) - } - var selector labels.Selector - if params.LabelSelector != nil { - selector, err = metav1.LabelSelectorAsSelector(params.LabelSelector) - if err != nil { - return nil, fmt.Errorf("failed to get label selectors from strategy's params: %+v", err) - } - } - - return &topologySpreadStrategyParams{ - thresholdPriority: thresholdPriority, - includedNamespaces: includedNamespaces, - excludedNamespaces: excludedNamespaces, - labelSelector: selector, - nodeFit: params.NodeFit, - }, nil -} - func RemovePodsViolatingTopologySpreadConstraint( ctx context.Context, client clientset.Interface, @@ -101,16 +55,16 @@ func RemovePodsViolatingTopologySpreadConstraint( nodes []*v1.Node, podEvictor *evictions.PodEvictor, ) { - strategyParams, err := validateAndParseTopologySpreadParams(ctx, client, strategy.Params) + strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, strategy.Params) if err != nil { klog.ErrorS(err, "Invalid RemovePodsViolatingTopologySpreadConstraint parameters") return } evictable := podEvictor.Evictable( - evictions.WithPriorityThreshold(strategyParams.thresholdPriority), - evictions.WithNodeFit(strategyParams.nodeFit), - evictions.WithLabelSelector(strategyParams.labelSelector), + evictions.WithPriorityThreshold(strategyParams.ThresholdPriority), + evictions.WithNodeFit(strategyParams.NodeFit), + evictions.WithLabelSelector(strategyParams.LabelSelector), ) nodeMap := make(map[string]*v1.Node, len(nodes)) @@ -140,8 +94,8 @@ func RemovePodsViolatingTopologySpreadConstraint( podsForEviction := make(map[*v1.Pod]struct{}) // 1. for each namespace... for _, namespace := range namespaces.Items { - if (len(strategyParams.includedNamespaces) > 0 && !strategyParams.includedNamespaces.Has(namespace.Name)) || - (len(strategyParams.excludedNamespaces) > 0 && strategyParams.excludedNamespaces.Has(namespace.Name)) { + if (len(strategyParams.IncludedNamespaces) > 0 && !strategyParams.IncludedNamespaces.Has(namespace.Name)) || + (len(strategyParams.ExcludedNamespaces) > 0 && strategyParams.ExcludedNamespaces.Has(namespace.Name)) { continue } namespacePods, err := client.CoreV1().Pods(namespace.Name).List(ctx, metav1.ListOptions{}) diff --git a/pkg/descheduler/strategies/validation/strategyparams.go b/pkg/descheduler/strategies/validation/strategyparams.go new file mode 100644 index 000000000..cd6d4570a --- /dev/null +++ b/pkg/descheduler/strategies/validation/strategyparams.go @@ -0,0 +1,69 @@ +package validation + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + clientset "k8s.io/client-go/kubernetes" + + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/utils" +) + +// ValidatedStrategyParams contains validated common strategy parameters +type ValidatedStrategyParams struct { + ThresholdPriority int32 + IncludedNamespaces sets.String + ExcludedNamespaces sets.String + LabelSelector labels.Selector + NodeFit bool +} + +func ValidateAndParseStrategyParams( + ctx context.Context, + client clientset.Interface, + params *api.StrategyParameters, +) (*ValidatedStrategyParams, error) { + var includedNamespaces, excludedNamespaces sets.String + if params == nil { + return &ValidatedStrategyParams{ + IncludedNamespaces: includedNamespaces, + ExcludedNamespaces: excludedNamespaces, + }, nil + } + + // At most one of include/exclude can be set + if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { + return nil, fmt.Errorf("only one of Include/Exclude namespaces can be set") + } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return nil, fmt.Errorf("only one of ThresholdPriority and thresholdPriorityClassName can be set") + } + + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, params) + if err != nil { + return nil, fmt.Errorf("failed to get threshold priority from strategy's params: %+v", err) + } + if params.Namespaces != nil { + includedNamespaces = sets.NewString(params.Namespaces.Include...) + excludedNamespaces = sets.NewString(params.Namespaces.Exclude...) + } + var selector labels.Selector + if params.LabelSelector != nil { + selector, err = metav1.LabelSelectorAsSelector(params.LabelSelector) + if err != nil { + return nil, fmt.Errorf("failed to get label selectors from strategy's params: %+v", err) + } + } + + return &ValidatedStrategyParams{ + ThresholdPriority: thresholdPriority, + IncludedNamespaces: includedNamespaces, + ExcludedNamespaces: excludedNamespaces, + LabelSelector: selector, + NodeFit: params.NodeFit, + }, nil +} diff --git a/pkg/descheduler/strategies/validation/strategyparams_test.go b/pkg/descheduler/strategies/validation/strategyparams_test.go new file mode 100644 index 000000000..54fd0e2ac --- /dev/null +++ b/pkg/descheduler/strategies/validation/strategyparams_test.go @@ -0,0 +1,79 @@ +package validation + +import ( + "context" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + + "sigs.k8s.io/descheduler/pkg/api" +) + +var ( + thresholdPriority int32 = 1000 +) + +func TestValidStrategyParams(t *testing.T) { + ctx := context.Background() + fakeClient := &fake.Clientset{} + testCases := []struct { + name string + params *api.StrategyParameters + }{ + {name: "validate nil params", params: nil}, + {name: "validate empty params", params: &api.StrategyParameters{}}, + {name: "validate params with NodeFit", params: &api.StrategyParameters{NodeFit: true}}, + {name: "validate params with ThresholdPriority", params: &api.StrategyParameters{ThresholdPriority: &thresholdPriority}}, + {name: "validate params with priorityClassName", params: &api.StrategyParameters{ThresholdPriorityClassName: "high-priority"}}, + {name: "validate params with excluded namespace", params: &api.StrategyParameters{Namespaces: &api.Namespaces{Exclude: []string{"excluded-ns"}}}}, + {name: "validate params with included namespace", params: &api.StrategyParameters{Namespaces: &api.Namespaces{Include: []string{"include-ns"}}}}, + {name: "validate params with empty label selector", params: &api.StrategyParameters{LabelSelector: &metav1.LabelSelector{}}}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + params, err := ValidateAndParseStrategyParams(ctx, fakeClient, tc.params) + if err != nil { + t.Errorf("strategy params should be valid but got err: %v", err.Error()) + } + if params == nil { + t.Errorf("strategy params should return a strategyParams but got nil") + } + }) + } +} + +func TestInvalidStrategyParams(t *testing.T) { + ctx := context.Background() + fakeClient := &fake.Clientset{} + testCases := []struct { + name string + params *api.StrategyParameters + }{ + { + name: "invalid params with both included and excluded namespaces nil params", + params: &api.StrategyParameters{Namespaces: &api.Namespaces{Include: []string{"include-ns"}, Exclude: []string{"exclude-ns"}}}, + }, + { + name: "invalid params with both threshold priority and priority class name", + params: &api.StrategyParameters{ThresholdPriorityClassName: "high-priority", ThresholdPriority: &thresholdPriority}, + }, + { + name: "invalid params with bad label selector", + params: &api.StrategyParameters{LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"": "missing-label"}}}, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + params, err := ValidateAndParseStrategyParams(ctx, fakeClient, tc.params) + if err == nil { + t.Errorf("strategy params should be invalid but did not get err") + } + if params != nil { + t.Errorf("strategy params should return a nil strategyParams but got %v", params) + } + }) + } +}