From e14b86eb8ce2a1b71535b2f9a95cb0156de224ee Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Mon, 10 Mar 2025 22:55:34 +0100 Subject: [PATCH] [nodeutilization]: allow to set a metrics source as a string so it can be later extended for exclusive configuration --- README.md | 30 ++++++++++---- charts/descheduler/templates/clusterrole.yaml | 6 ++- charts/descheduler/values.yaml | 4 +- pkg/api/types.go | 24 +++++++++-- pkg/api/v1alpha2/types.go | 24 +++++++++-- pkg/api/v1alpha2/zz_generated.conversion.go | 40 ++++++++++++++++--- pkg/api/v1alpha2/zz_generated.deepcopy.go | 27 ++++++++++++- pkg/api/zz_generated.deepcopy.go | 27 ++++++++++++- pkg/descheduler/descheduler.go | 6 +-- pkg/descheduler/descheduler_test.go | 10 +++-- pkg/descheduler/policyconfig.go | 14 +++++-- pkg/descheduler/policyconfig_test.go | 22 ++++++++++ .../nodeutilization/lownodeutilization.go | 3 +- .../lownodeutilization_test.go | 9 ++++- .../plugins/nodeutilization/types.go | 12 ++++-- .../nodeutilization/zz_generated.deepcopy.go | 7 +++- test/e2e/e2e_lownodeutilization_test.go | 10 ++--- 17 files changed, 226 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 3e7bd4de6..afb8ce87f 100644 --- a/README.md +++ b/README.md @@ -124,11 +124,22 @@ These are top level keys in the Descheduler Policy that you can use to configure | `maxNoOfPodsToEvictPerNode` | `int` | `nil` | Maximum number of pods evicted from each node (summed through all strategies). | | `maxNoOfPodsToEvictPerNamespace` | `int` | `nil` | Maximum number of pods evicted from each namespace (summed through all strategies). | | `maxNoOfPodsToEvictTotal` | `int` | `nil` | Maximum number of pods evicted per rescheduling cycle (summed through all strategies). | -| `metricsCollector` | `object` | `nil` | Configures collection of metrics for actual resource utilization. | +| `metricsCollector` (deprecated) | `object` | `nil` | Configures collection of metrics for actual resource utilization. | | `metricsCollector.enabled` | `bool` | `false` | Enables Kubernetes [Metrics Server](https://kubernetes-sigs.github.io/metrics-server/) collection. | +| `metricsProviders` | `[]object` | `nil` | Enables various metrics providers like Kubernetes [Metrics Server](https://kubernetes-sigs.github.io/metrics-server/) | | `evictionFailureEventNotification` | `bool` | `false` | Enables eviction failure event notification. | | `gracePeriodSeconds` | `int` | `0` | The duration in seconds before the object should be deleted. The value zero indicates delete immediately. | +The descheduler currently allows to configure a metric collection of Kubernetes Metrics through `metricsProviders` field. +The previous way of setting `metricsCollector` field is deprecated. There is currently one source to configure: +``` +metricsProviders: +- source: KubernetesMetrics +``` +The list can be extended with other metrics providers in the future. +In general, each plugin can consume metrics from a different provider so multiple distinct providers can be configured in parallel. + + ### Evictor Plugin configuration (Default Evictor) The Default Evictor Plugin is used by default for filtering pods before processing them in an strategy plugin, or for applying a PreEvictionFilter of pods before eviction. You can also create your own Evictor Plugin or use the Default one provided by Descheduler. Other uses for the Evictor plugin can be to sort, filter, validate or group pods by different criteria, and that's why this is handled by a plugin and not configured in the top level config. @@ -163,8 +174,9 @@ maxNoOfPodsToEvictPerNode: 5000 # you don't need to set this, unlimited if not s maxNoOfPodsToEvictPerNamespace: 5000 # you don't need to set this, unlimited if not set maxNoOfPodsToEvictTotal: 5000 # you don't need to set this, unlimited if not set gracePeriodSeconds: 60 # you don't need to set this, 0 if not set -metricsCollector: - enabled: true # you don't need to set this, metrics are not collected if not set +# you don't need to set this, Kubernetes metrics are not collected if not set +metricsProviders: +- source: KubernetesMetrics profiles: - name: ProfileName pluginConfig: @@ -288,9 +300,10 @@ A resource consumption above (resp. below) this window is considered as overutil This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands like `kubectl top`) may differ from the calculated consumption, due to these components reporting -actual usage metrics. Metrics-based descheduling can be enabled by setting `metricsUtilization.metricsServer` field. -In order to have the plugin consume the metrics the metric collector needs to be configured as well. -See `metricsCollector` field at [Top Level configuration](#top-level-configuration) for available options. +actual usage metrics. Metrics-based descheduling can be enabled by setting `metricsUtilization.metricsServer` field (deprecated) +or `metricsUtilization.source` field to `KubernetesMetrics`. +In order to have the plugin consume the metrics the metric provider needs to be configured as well. +See `metricsProviders` field at [Top Level configuration](#top-level-configuration) for available options. **Parameters:** @@ -303,7 +316,8 @@ See `metricsCollector` field at [Top Level configuration](#top-level-configurati |`evictionLimits`|object| |`evictableNamespaces`|(see [namespace filtering](#namespace-filtering))| |`metricsUtilization`|object| -|`metricsUtilization.metricsServer`|bool| +|`metricsUtilization.metricsServer` (deprecated)|bool| +|`metricsUtilization.source`|string| **Example:** @@ -325,7 +339,7 @@ profiles: "memory": 50 "pods": 50 metricsUtilization: - metricsServer: true + source: KubernetesMetrics evictionLimits: node: 5 plugins: diff --git a/charts/descheduler/templates/clusterrole.yaml b/charts/descheduler/templates/clusterrole.yaml index 3a1c3d60b..605b28886 100644 --- a/charts/descheduler/templates/clusterrole.yaml +++ b/charts/descheduler/templates/clusterrole.yaml @@ -36,9 +36,13 @@ rules: resourceNames: ["{{ .Values.leaderElection.resourceName | default "descheduler" }}"] verbs: ["get", "patch", "delete"] {{- end }} -{{- if and .Values.deschedulerPolicy .Values.deschedulerPolicy.metricsCollector .Values.deschedulerPolicy.metricsCollector.enabled }} +{{- if and .Values.deschedulerPolicy }} +{{- range .Values.deschedulerPolicy.metricsProviders }} +{{- if and (hasKey . "source") (eq .source "KubernetesMetrics") }} - apiGroups: ["metrics.k8s.io"] resources: ["pods", "nodes"] verbs: ["get", "list"] {{- end }} +{{- end }} +{{- end }} {{- end -}} diff --git a/charts/descheduler/values.yaml b/charts/descheduler/values.yaml index 17466b067..3398da2fc 100644 --- a/charts/descheduler/values.yaml +++ b/charts/descheduler/values.yaml @@ -96,8 +96,8 @@ deschedulerPolicy: # nodeSelector: "key1=value1,key2=value2" # maxNoOfPodsToEvictPerNode: 10 # maxNoOfPodsToEvictPerNamespace: 10 - # metricsCollector: - # enabled: true + # metricsProviders: + # - source: KubernetesMetrics # ignorePvcPods: true # evictLocalStoragePods: true # evictDaemonSetPods: true diff --git a/pkg/api/types.go b/pkg/api/types.go index 53c61b72b..43f14bef7 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -48,7 +48,11 @@ type DeschedulerPolicy struct { EvictionFailureEventNotification *bool // MetricsCollector configures collection of metrics about actual resource utilization - MetricsCollector MetricsCollector + // Deprecated. Use MetricsProviders field instead. + MetricsCollector *MetricsCollector + + // MetricsProviders configure collection of metrics about actual resource utilization from various sources + MetricsProviders []MetricsProvider // GracePeriodSeconds The duration in seconds before the object should be deleted. Value must be non-negative integer. // The value zero indicates delete immediately. If this value is nil, the default grace period for the @@ -105,12 +109,26 @@ type PluginSet struct { Disabled []string } +type MetricsSource string + +const ( + // KubernetesMetrics enables metrics from a Kubernetes metrics server. + // Please see https://kubernetes-sigs.github.io/metrics-server/ for more. + KubernetesMetrics MetricsSource = "KubernetesMetrics" +) + // MetricsCollector configures collection of metrics about actual resource utilization type MetricsCollector struct { - // Enabled metrics collection from kubernetes metrics. - // Later, the collection can be extended to other providers. + // Enabled metrics collection from Kubernetes metrics. + // Deprecated. Use MetricsProvider.Source field instead. Enabled bool } +// MetricsProvider configures collection of metrics about actual resource utilization from a given source +type MetricsProvider struct { + // Source enables metrics from Kubernetes metrics server. + Source MetricsSource +} + // ReferencedResourceList is an adaption of v1.ResourceList with resources as references type ReferencedResourceList = map[v1.ResourceName]*resource.Quantity diff --git a/pkg/api/v1alpha2/types.go b/pkg/api/v1alpha2/types.go index e67b54b53..e72ecf343 100644 --- a/pkg/api/v1alpha2/types.go +++ b/pkg/api/v1alpha2/types.go @@ -46,7 +46,11 @@ type DeschedulerPolicy struct { EvictionFailureEventNotification *bool `json:"evictionFailureEventNotification,omitempty"` // MetricsCollector configures collection of metrics for actual resource utilization - MetricsCollector MetricsCollector `json:"metricsCollector,omitempty"` + // Deprecated. Use MetricsProviders field instead. + MetricsCollector *MetricsCollector `json:"metricsCollector,omitempty"` + + // MetricsProviders configure collection of metrics about actual resource utilization from various sources + MetricsProviders []MetricsProvider `json:"metricsProviders,omitempty"` // GracePeriodSeconds The duration in seconds before the object should be deleted. Value must be non-negative integer. // The value zero indicates delete immediately. If this value is nil, the default grace period for the @@ -80,9 +84,23 @@ type PluginSet struct { Disabled []string `json:"disabled"` } +type MetricsSource string + +const ( + // KubernetesMetrics enables metrics from a Kubernetes metrics server. + // Please see https://kubernetes-sigs.github.io/metrics-server/ for more. + KubernetesMetrics MetricsSource = "KubernetesMetrics" +) + // MetricsCollector configures collection of metrics about actual resource utilization type MetricsCollector struct { - // Enabled metrics collection from kubernetes metrics. - // Later, the collection can be extended to other providers. + // Enabled metrics collection from Kubernetes metrics server. + // Deprecated. Use MetricsProvider.Source field instead. Enabled bool `json:"enabled,omitempty"` } + +// MetricsProvider configures collection of metrics about actual resource utilization from a given source +type MetricsProvider struct { + // Source enables metrics from Kubernetes metrics server. + Source MetricsSource `json:"source,omitempty"` +} diff --git a/pkg/api/v1alpha2/zz_generated.conversion.go b/pkg/api/v1alpha2/zz_generated.conversion.go index 61879b9e8..01e2c6b85 100644 --- a/pkg/api/v1alpha2/zz_generated.conversion.go +++ b/pkg/api/v1alpha2/zz_generated.conversion.go @@ -56,6 +56,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*MetricsProvider)(nil), (*api.MetricsProvider)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha2_MetricsProvider_To_api_MetricsProvider(a.(*MetricsProvider), b.(*api.MetricsProvider), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*api.MetricsProvider)(nil), (*MetricsProvider)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_api_MetricsProvider_To_v1alpha2_MetricsProvider(a.(*api.MetricsProvider), b.(*MetricsProvider), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*api.PluginConfig)(nil), (*PluginConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_api_PluginConfig_To_v1alpha2_PluginConfig(a.(*api.PluginConfig), b.(*PluginConfig), scope) }); err != nil { @@ -116,9 +126,8 @@ func autoConvert_v1alpha2_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Desched out.MaxNoOfPodsToEvictPerNamespace = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNamespace)) out.MaxNoOfPodsToEvictTotal = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictTotal)) out.EvictionFailureEventNotification = (*bool)(unsafe.Pointer(in.EvictionFailureEventNotification)) - if err := Convert_v1alpha2_MetricsCollector_To_api_MetricsCollector(&in.MetricsCollector, &out.MetricsCollector, s); err != nil { - return err - } + out.MetricsCollector = (*api.MetricsCollector)(unsafe.Pointer(in.MetricsCollector)) + out.MetricsProviders = *(*[]api.MetricsProvider)(unsafe.Pointer(&in.MetricsProviders)) out.GracePeriodSeconds = (*int64)(unsafe.Pointer(in.GracePeriodSeconds)) return nil } @@ -140,9 +149,8 @@ func autoConvert_api_DeschedulerPolicy_To_v1alpha2_DeschedulerPolicy(in *api.Des out.MaxNoOfPodsToEvictPerNamespace = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNamespace)) out.MaxNoOfPodsToEvictTotal = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictTotal)) out.EvictionFailureEventNotification = (*bool)(unsafe.Pointer(in.EvictionFailureEventNotification)) - if err := Convert_api_MetricsCollector_To_v1alpha2_MetricsCollector(&in.MetricsCollector, &out.MetricsCollector, s); err != nil { - return err - } + out.MetricsCollector = (*MetricsCollector)(unsafe.Pointer(in.MetricsCollector)) + out.MetricsProviders = *(*[]MetricsProvider)(unsafe.Pointer(&in.MetricsProviders)) out.GracePeriodSeconds = (*int64)(unsafe.Pointer(in.GracePeriodSeconds)) return nil } @@ -215,6 +223,26 @@ func Convert_api_MetricsCollector_To_v1alpha2_MetricsCollector(in *api.MetricsCo return autoConvert_api_MetricsCollector_To_v1alpha2_MetricsCollector(in, out, s) } +func autoConvert_v1alpha2_MetricsProvider_To_api_MetricsProvider(in *MetricsProvider, out *api.MetricsProvider, s conversion.Scope) error { + out.Source = api.MetricsSource(in.Source) + return nil +} + +// Convert_v1alpha2_MetricsProvider_To_api_MetricsProvider is an autogenerated conversion function. +func Convert_v1alpha2_MetricsProvider_To_api_MetricsProvider(in *MetricsProvider, out *api.MetricsProvider, s conversion.Scope) error { + return autoConvert_v1alpha2_MetricsProvider_To_api_MetricsProvider(in, out, s) +} + +func autoConvert_api_MetricsProvider_To_v1alpha2_MetricsProvider(in *api.MetricsProvider, out *MetricsProvider, s conversion.Scope) error { + out.Source = MetricsSource(in.Source) + return nil +} + +// Convert_api_MetricsProvider_To_v1alpha2_MetricsProvider is an autogenerated conversion function. +func Convert_api_MetricsProvider_To_v1alpha2_MetricsProvider(in *api.MetricsProvider, out *MetricsProvider, s conversion.Scope) error { + return autoConvert_api_MetricsProvider_To_v1alpha2_MetricsProvider(in, out, s) +} + func autoConvert_v1alpha2_PluginConfig_To_api_PluginConfig(in *PluginConfig, out *api.PluginConfig, s conversion.Scope) error { out.Name = in.Name if err := runtime.Convert_runtime_RawExtension_To_runtime_Object(&in.Args, &out.Args, s); err != nil { diff --git a/pkg/api/v1alpha2/zz_generated.deepcopy.go b/pkg/api/v1alpha2/zz_generated.deepcopy.go index f6e56be59..ab0b94e16 100644 --- a/pkg/api/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha2/zz_generated.deepcopy.go @@ -61,7 +61,16 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(bool) **out = **in } - out.MetricsCollector = in.MetricsCollector + if in.MetricsCollector != nil { + in, out := &in.MetricsCollector, &out.MetricsCollector + *out = new(MetricsCollector) + **out = **in + } + if in.MetricsProviders != nil { + in, out := &in.MetricsProviders, &out.MetricsProviders + *out = make([]MetricsProvider, len(*in)) + copy(*out, *in) + } if in.GracePeriodSeconds != nil { in, out := &in.GracePeriodSeconds, &out.GracePeriodSeconds *out = new(int64) @@ -128,6 +137,22 @@ func (in *MetricsCollector) DeepCopy() *MetricsCollector { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricsProvider) DeepCopyInto(out *MetricsProvider) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricsProvider. +func (in *MetricsProvider) DeepCopy() *MetricsProvider { + if in == nil { + return nil + } + out := new(MetricsProvider) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PluginConfig) DeepCopyInto(out *PluginConfig) { *out = *in diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index fac17715e..f38b9e124 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -61,7 +61,16 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(bool) **out = **in } - out.MetricsCollector = in.MetricsCollector + if in.MetricsCollector != nil { + in, out := &in.MetricsCollector, &out.MetricsCollector + *out = new(MetricsCollector) + **out = **in + } + if in.MetricsProviders != nil { + in, out := &in.MetricsProviders, &out.MetricsProviders + *out = make([]MetricsProvider, len(*in)) + copy(*out, *in) + } if in.GracePeriodSeconds != nil { in, out := &in.GracePeriodSeconds, &out.GracePeriodSeconds *out = new(int64) @@ -149,6 +158,22 @@ func (in *MetricsCollector) DeepCopy() *MetricsCollector { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricsProvider) DeepCopyInto(out *MetricsProvider) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricsProvider. +func (in *MetricsProvider) DeepCopy() *MetricsProvider { + if in == nil { + return nil + } + out := new(MetricsProvider) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Namespaces) DeepCopyInto(out *Namespaces) { *out = *in diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 046822334..41534d128 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -166,7 +166,7 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu } var metricsCollector *metricscollector.MetricsCollector - if deschedulerPolicy.MetricsCollector.Enabled { + if (deschedulerPolicy.MetricsCollector != nil && deschedulerPolicy.MetricsCollector.Enabled) || (len(deschedulerPolicy.MetricsProviders) > 0 && deschedulerPolicy.MetricsProviders[0].Source == api.KubernetesMetrics) { nodeSelector := labels.Everything() if deschedulerPolicy.NodeSelector != nil { sel, err := labels.Parse(*deschedulerPolicy.NodeSelector) @@ -332,7 +332,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error { return err } - if deschedulerPolicy.MetricsCollector.Enabled { + if (deschedulerPolicy.MetricsCollector != nil && deschedulerPolicy.MetricsCollector.Enabled) || (len(deschedulerPolicy.MetricsProviders) > 0 && deschedulerPolicy.MetricsProviders[0].Source == api.KubernetesMetrics) { metricsClient, err := client.CreateMetricsClient(clientConnection, "descheduler") if err != nil { return err @@ -448,7 +448,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer sharedInformerFactory.WaitForCacheSync(ctx.Done()) descheduler.podEvictor.WaitForEventHandlersSync(ctx) - if deschedulerPolicy.MetricsCollector.Enabled { + if (deschedulerPolicy.MetricsCollector != nil && deschedulerPolicy.MetricsCollector.Enabled) || (len(deschedulerPolicy.MetricsProviders) > 0 && deschedulerPolicy.MetricsProviders[0].Source == api.KubernetesMetrics) { go func() { klog.V(2).Infof("Starting metrics collector") descheduler.metricsCollector.Run(ctx) diff --git a/pkg/descheduler/descheduler_test.go b/pkg/descheduler/descheduler_test.go index 1591444b6..bea4ede85 100644 --- a/pkg/descheduler/descheduler_test.go +++ b/pkg/descheduler/descheduler_test.go @@ -136,6 +136,10 @@ func removeDuplicatesPolicy() *api.DeschedulerPolicy { } func lowNodeUtilizationPolicy(thresholds, targetThresholds api.ResourceThresholds, metricsEnabled bool) *api.DeschedulerPolicy { + var metricsSource api.MetricsSource = "" + if metricsEnabled { + metricsSource = api.KubernetesMetrics + } return &api.DeschedulerPolicy{ Profiles: []api.DeschedulerProfile{ { @@ -146,8 +150,8 @@ func lowNodeUtilizationPolicy(thresholds, targetThresholds api.ResourceThreshold Args: &nodeutilization.LowNodeUtilizationArgs{ Thresholds: thresholds, TargetThresholds: targetThresholds, - MetricsUtilization: nodeutilization.MetricsUtilization{ - MetricsServer: metricsEnabled, + MetricsUtilization: &nodeutilization.MetricsUtilization{ + Source: metricsSource, }, }, }, @@ -837,7 +841,7 @@ func TestLoadAwareDescheduling(t *testing.T) { }, true, // enabled metrics utilization ) - policy.MetricsCollector.Enabled = true + policy.MetricsProviders = []api.MetricsProvider{{Source: api.KubernetesMetrics}} ctxCancel, cancel := context.WithCancel(ctx) _, descheduler, _ := initDescheduler( diff --git a/pkg/descheduler/policyconfig.go b/pkg/descheduler/policyconfig.go index d2f281abb..8cdbff66c 100644 --- a/pkg/descheduler/policyconfig.go +++ b/pkg/descheduler/policyconfig.go @@ -137,11 +137,11 @@ func setDefaultEvictor(profile api.DeschedulerProfile, client clientset.Interfac } func validateDeschedulerConfiguration(in api.DeschedulerPolicy, registry pluginregistry.Registry) error { - var errorsInProfiles []error + var errorsInPolicy []error for _, profile := range in.Profiles { for _, pluginConfig := range profile.PluginConfigs { if _, ok := registry[pluginConfig.Name]; !ok { - errorsInProfiles = append(errorsInProfiles, fmt.Errorf("in profile %s: plugin %s in pluginConfig not registered", profile.Name, pluginConfig.Name)) + errorsInPolicy = append(errorsInPolicy, fmt.Errorf("in profile %s: plugin %s in pluginConfig not registered", profile.Name, pluginConfig.Name)) continue } @@ -150,9 +150,15 @@ func validateDeschedulerConfiguration(in api.DeschedulerPolicy, registry pluginr continue } if err := pluginUtilities.PluginArgValidator(pluginConfig.Args); err != nil { - errorsInProfiles = append(errorsInProfiles, fmt.Errorf("in profile %s: %s", profile.Name, err.Error())) + errorsInPolicy = append(errorsInPolicy, fmt.Errorf("in profile %s: %s", profile.Name, err.Error())) } } } - return utilerrors.NewAggregate(errorsInProfiles) + if len(in.MetricsProviders) > 1 { + errorsInPolicy = append(errorsInPolicy, fmt.Errorf("only a single metrics provider can be set, got %v instead", len(in.MetricsProviders))) + } + if len(in.MetricsProviders) > 0 && in.MetricsCollector != nil && in.MetricsCollector.Enabled { + errorsInPolicy = append(errorsInPolicy, fmt.Errorf("it is not allowed to combine metrics provider when metrics collector is enabled")) + } + return utilerrors.NewAggregate(errorsInPolicy) } diff --git a/pkg/descheduler/policyconfig_test.go b/pkg/descheduler/policyconfig_test.go index 1ae417ec2..8199003da 100644 --- a/pkg/descheduler/policyconfig_test.go +++ b/pkg/descheduler/policyconfig_test.go @@ -211,6 +211,28 @@ func TestValidateDeschedulerConfiguration(t *testing.T) { }, result: fmt.Errorf("[in profile RemoveFailedPods: only one of Include/Exclude namespaces can be set, in profile RemovePodsViolatingTopologySpreadConstraint: only one of Include/Exclude namespaces can be set]"), }, + { + description: "Too many metrics providers error", + deschedulerPolicy: api.DeschedulerPolicy{ + MetricsProviders: []api.MetricsProvider{ + {Source: api.KubernetesMetrics}, + {Source: api.KubernetesMetrics}, + }, + }, + result: fmt.Errorf("only a single metrics provider can be set, got 2 instead"), + }, + { + description: "Too many metrics providers error", + deschedulerPolicy: api.DeschedulerPolicy{ + MetricsCollector: &api.MetricsCollector{ + Enabled: true, + }, + MetricsProviders: []api.MetricsProvider{ + {Source: api.KubernetesMetrics}, + }, + }, + result: fmt.Errorf("it is not allowed to combine metrics provider when metrics collector is enabled"), + }, } for _, tc := range testCases { diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization.go b/pkg/framework/plugins/nodeutilization/lownodeutilization.go index 7602f455b..0547517c7 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go @@ -89,7 +89,8 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds) var usageClient usageClient - if lowNodeUtilizationArgsArgs.MetricsUtilization.MetricsServer { + // MetricsServer is deprecated, removed once dropped + if lowNodeUtilizationArgsArgs.MetricsUtilization != nil && (lowNodeUtilizationArgsArgs.MetricsUtilization.MetricsServer || lowNodeUtilizationArgsArgs.MetricsUtilization.Source == api.KubernetesMetrics) { if handle.MetricsCollector() == nil { return nil, fmt.Errorf("metrics client not initialized") } diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go index 671344a11..a97022365 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go @@ -1257,14 +1257,19 @@ func TestLowNodeUtilization(t *testing.T) { } handle.MetricsCollectorImpl = collector + var metricsSource api.MetricsSource = "" + if metricsEnabled { + metricsSource = api.KubernetesMetrics + } + plugin, err := NewLowNodeUtilization(&LowNodeUtilizationArgs{ Thresholds: tc.thresholds, TargetThresholds: tc.targetThresholds, UseDeviationThresholds: tc.useDeviationThresholds, EvictionLimits: tc.evictionLimits, EvictableNamespaces: tc.evictableNamespaces, - MetricsUtilization: MetricsUtilization{ - MetricsServer: metricsEnabled, + MetricsUtilization: &MetricsUtilization{ + Source: metricsSource, }, }, handle) diff --git a/pkg/framework/plugins/nodeutilization/types.go b/pkg/framework/plugins/nodeutilization/types.go index e670edab0..23e5a3ebf 100644 --- a/pkg/framework/plugins/nodeutilization/types.go +++ b/pkg/framework/plugins/nodeutilization/types.go @@ -28,7 +28,7 @@ type LowNodeUtilizationArgs struct { Thresholds api.ResourceThresholds `json:"thresholds"` TargetThresholds api.ResourceThresholds `json:"targetThresholds"` NumberOfNodes int `json:"numberOfNodes,omitempty"` - MetricsUtilization MetricsUtilization `json:"metricsUtilization,omitempty"` + MetricsUtilization *MetricsUtilization `json:"metricsUtilization,omitempty"` // Naming this one differently since namespaces are still // considered while considering resources used by pods @@ -45,9 +45,8 @@ type LowNodeUtilizationArgs struct { type HighNodeUtilizationArgs struct { metav1.TypeMeta `json:",inline"` - Thresholds api.ResourceThresholds `json:"thresholds"` - NumberOfNodes int `json:"numberOfNodes,omitempty"` - MetricsUtilization MetricsUtilization `json:"metricsUtilization,omitempty"` + Thresholds api.ResourceThresholds `json:"thresholds"` + NumberOfNodes int `json:"numberOfNodes,omitempty"` // Naming this one differently since namespaces are still // considered while considering resources used by pods @@ -59,5 +58,10 @@ type HighNodeUtilizationArgs struct { type MetricsUtilization struct { // metricsServer enables metrics from a kubernetes metrics server. // Please see https://kubernetes-sigs.github.io/metrics-server/ for more. + // Deprecated. Use MetricsSource instead. MetricsServer bool `json:"metricsServer,omitempty"` + + // source enables the plugin to consume metrics from a metrics source. + // Currently only KubernetesMetrics available. + Source api.MetricsSource `json:"source,omitempty"` } diff --git a/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go b/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go index adbc058e9..36b90743c 100644 --- a/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go +++ b/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go @@ -37,7 +37,6 @@ func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) { (*out)[key] = val } } - out.MetricsUtilization = in.MetricsUtilization if in.EvictableNamespaces != nil { in, out := &in.EvictableNamespaces, &out.EvictableNamespaces *out = new(api.Namespaces) @@ -82,7 +81,11 @@ func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) { (*out)[key] = val } } - out.MetricsUtilization = in.MetricsUtilization + if in.MetricsUtilization != nil { + in, out := &in.MetricsUtilization, &out.MetricsUtilization + *out = new(MetricsUtilization) + **out = **in + } if in.EvictableNamespaces != nil { in, out := &in.EvictableNamespaces, &out.EvictableNamespaces *out = new(api.Namespaces) diff --git a/test/e2e/e2e_lownodeutilization_test.go b/test/e2e/e2e_lownodeutilization_test.go index 125c5497f..684627a39 100644 --- a/test/e2e/e2e_lownodeutilization_test.go +++ b/test/e2e/e2e_lownodeutilization_test.go @@ -43,7 +43,7 @@ import ( func lowNodeUtilizationPolicy(lowNodeUtilizationArgs *nodeutilization.LowNodeUtilizationArgs, evictorArgs *defaultevictor.DefaultEvictorArgs, metricsCollectorEnabled bool) *apiv1alpha2.DeschedulerPolicy { return &apiv1alpha2.DeschedulerPolicy{ - MetricsCollector: apiv1alpha2.MetricsCollector{ + MetricsCollector: &apiv1alpha2.MetricsCollector{ Enabled: metricsCollectorEnabled, }, Profiles: []apiv1alpha2.DeschedulerProfile{ @@ -147,8 +147,8 @@ func TestLowNodeUtilizationKubernetesMetrics(t *testing.T) { v1.ResourceCPU: 50, v1.ResourcePods: 50, }, - MetricsUtilization: nodeutilization.MetricsUtilization{ - MetricsServer: true, + MetricsUtilization: &nodeutilization.MetricsUtilization{ + Source: api.KubernetesMetrics, }, }, evictorArgs: &defaultevictor.DefaultEvictorArgs{}, @@ -171,8 +171,8 @@ func TestLowNodeUtilizationKubernetesMetrics(t *testing.T) { v1.ResourceCPU: 50, v1.ResourcePods: 50, }, - MetricsUtilization: nodeutilization.MetricsUtilization{ - MetricsServer: true, + MetricsUtilization: &nodeutilization.MetricsUtilization{ + Source: api.KubernetesMetrics, }, }, evictorArgs: &defaultevictor.DefaultEvictorArgs{},