From 93a014ef5ebe66d4945f5ad181deb5af9c61ccd8 Mon Sep 17 00:00:00 2001 From: Harsha Narayana Date: Sat, 4 Feb 2023 09:38:28 +0530 Subject: [PATCH] metrics: enable loop duration and strategy duration metrics (#1041) * metrics: enable loop duration and strategy duration metrics * metrics: enable loop duration and strategy duration metrics --- metrics/metrics.go | 20 ++++++++++++++ pkg/descheduler/descheduler.go | 49 +++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 168074b66..9cb70246c 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -48,9 +48,29 @@ var ( }, ) + DeschedulerLoopDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: DeschedulerSubsystem, + Name: "descheduler_loop_duration_seconds", + Help: "Time taken to complete a full descheduling cycle", + StabilityLevel: metrics.ALPHA, + Buckets: []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500}, + }, []string{}) + + DeschedulerStrategyDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: DeschedulerSubsystem, + Name: "descheduler_strategy_duration_seconds", + Help: "Time taken to complete Each strategy of the descheduling operation", + StabilityLevel: metrics.ALPHA, + Buckets: []float64{0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100}, + }, []string{"strategy", "profile"}) + metricsList = []metrics.Registerable{ PodsEvicted, buildInfo, + DeschedulerLoopDuration, + DeschedulerStrategyDuration, } ) diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index a1b72eb09..bac6ec11b 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -19,6 +19,7 @@ package descheduler import ( "context" "fmt" + "time" componentbaseconfig "k8s.io/component-base/config" "k8s.io/klog/v2" @@ -50,6 +51,16 @@ import ( "sigs.k8s.io/descheduler/pkg/utils" ) +type enabledDeschedulePluginEntry struct { + Plugin framework.DeschedulePlugin + Profile string +} + +type enabledBalancePluginEntry struct { + Plugin framework.BalancePlugin + Profile string +} + func Run(ctx context.Context, rs *options.DeschedulerServer) error { metrics.Register() @@ -268,6 +279,8 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer defer eventBroadcaster.Shutdown() wait.NonSlidingUntil(func() { + loopStartDuration := time.Now() + defer metrics.DeschedulerLoopDuration.With(map[string]string{}).Observe(time.Since(loopStartDuration).Seconds()) nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeLister, nodeSelector) if err != nil { klog.V(1).InfoS("Unable to get ready nodes", "err", err) @@ -323,8 +336,8 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer eventRecorder, ) - var enabledDeschedulePlugins []framework.DeschedulePlugin - var enabledBalancePlugins []framework.BalancePlugin + var enabledDeschedulePlugins []enabledDeschedulePluginEntry + var enabledBalancePlugins []enabledBalancePluginEntry // Build plugins for _, profile := range deschedulerPolicy.Profiles { @@ -374,7 +387,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer } if pg != nil { // pg can be of any of each type, or both - enabledDeschedulePlugins, enabledBalancePlugins = includeProfilePluginsByType(enabledDeschedulePlugins, enabledBalancePlugins, pg) + enabledDeschedulePlugins, enabledBalancePlugins = includeProfilePluginsByType(enabledDeschedulePlugins, enabledBalancePlugins, pg, profile.Name) } } } @@ -385,10 +398,12 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer // handle or function which the Evictor has access to. For migration/in-progress framework // work, we are currently passing this via context. To be removed // (See discussion thread https://github.com/kubernetes-sigs/descheduler/pull/885#discussion_r919962292) - childCtx := context.WithValue(ctx, "strategyName", pg.Name()) - status := pg.Deschedule(childCtx, nodes) + strategyStart := time.Now() + childCtx := context.WithValue(ctx, "strategyName", pg.Plugin.Name()) + status := pg.Plugin.Deschedule(childCtx, nodes) + metrics.DeschedulerStrategyDuration.With(map[string]string{"strategy": pg.Plugin.Name(), "profile": pg.Profile}).Observe(time.Since(strategyStart).Seconds()) if status != nil && status.Err != nil { - klog.ErrorS(status.Err, "plugin finished with error", "pluginName", pg.Name()) + klog.ErrorS(status.Err, "plugin finished with error", "pluginName", pg.Plugin.Name()) } } @@ -397,10 +412,12 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer // handle or function which the Evictor has access to. For migration/in-progress framework // work, we are currently passing this via context. To be removed // (See discussion thread https://github.com/kubernetes-sigs/descheduler/pull/885#discussion_r919962292) - childCtx := context.WithValue(ctx, "strategyName", pg.Name()) - status := pg.Balance(childCtx, nodes) + strategyStart := time.Now() + childCtx := context.WithValue(ctx, "strategyName", pg.Plugin.Name()) + status := pg.Plugin.Balance(childCtx, nodes) + metrics.DeschedulerStrategyDuration.With(map[string]string{"strategy": pg.Plugin.Name(), "profile": pg.Profile}).Observe(time.Since(strategyStart).Seconds()) if status != nil && status.Err != nil { - klog.ErrorS(status.Err, "plugin finished with error", "pluginName", pg.Name()) + klog.ErrorS(status.Err, "plugin finished with error", "pluginName", pg.Plugin.Name()) } } @@ -415,24 +432,24 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer return nil } -func includeProfilePluginsByType(enabledDeschedulePlugins []framework.DeschedulePlugin, enabledBalancePlugins []framework.BalancePlugin, pg framework.Plugin) ([]framework.DeschedulePlugin, []framework.BalancePlugin) { - enabledDeschedulePlugins = includeDeschedule(enabledDeschedulePlugins, pg) - enabledBalancePlugins = includeBalance(enabledBalancePlugins, pg) +func includeProfilePluginsByType(enabledDeschedulePlugins []enabledDeschedulePluginEntry, enabledBalancePlugins []enabledBalancePluginEntry, pg framework.Plugin, profile string) ([]enabledDeschedulePluginEntry, []enabledBalancePluginEntry) { + enabledDeschedulePlugins = includeDeschedule(enabledDeschedulePlugins, pg, profile) + enabledBalancePlugins = includeBalance(enabledBalancePlugins, pg, profile) return enabledDeschedulePlugins, enabledBalancePlugins } -func includeDeschedule(enabledDeschedulePlugins []framework.DeschedulePlugin, pg framework.Plugin) []framework.DeschedulePlugin { +func includeDeschedule(enabledDeschedulePlugins []enabledDeschedulePluginEntry, pg framework.Plugin, profile string) []enabledDeschedulePluginEntry { _, ok := pg.(framework.DeschedulePlugin) if ok { - enabledDeschedulePlugins = append(enabledDeschedulePlugins, pg.(framework.DeschedulePlugin)) + enabledDeschedulePlugins = append(enabledDeschedulePlugins, enabledDeschedulePluginEntry{Plugin: pg.(framework.DeschedulePlugin), Profile: profile}) } return enabledDeschedulePlugins } -func includeBalance(enabledBalancePlugins []framework.BalancePlugin, pg framework.Plugin) []framework.BalancePlugin { +func includeBalance(enabledBalancePlugins []enabledBalancePluginEntry, pg framework.Plugin, profile string) []enabledBalancePluginEntry { _, ok := pg.(framework.BalancePlugin) if ok { - enabledBalancePlugins = append(enabledBalancePlugins, pg.(framework.BalancePlugin)) + enabledBalancePlugins = append(enabledBalancePlugins, enabledBalancePluginEntry{Plugin: pg.(framework.BalancePlugin), Profile: profile}) } return enabledBalancePlugins }