[nodeutilization]: prometheus usage client with prometheus metrics

2026-01-26 21:31:18 +01:00 · 2024-11-07 16:32:06 +01:00
parent aed345994f
commit e283c31030
26 changed files with 1541 additions and 85 deletions
--- a/pkg/framework/plugins/nodeutilization/usageclients.go
+++ b/pkg/framework/plugins/nodeutilization/usageclients.go
@@ -19,7 +19,11 @@ package nodeutilization
 import (
 	"context"
 	"fmt"
+	"time"

+	promapi "github.com/prometheus/client_golang/api"
+	promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
+	"github.com/prometheus/common/model"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -33,11 +37,33 @@ import (
 	"sigs.k8s.io/descheduler/pkg/utils"
 )

+type UsageClientType int
+
+const (
+	requestedUsageClientType UsageClientType = iota
+	actualUsageClientType
+	prometheusUsageClientType
+)
+
+type notSupportedError struct {
+	usageClientType UsageClientType
+}
+
+func (e notSupportedError) Error() string {
+	return "maximum number of evicted pods per node reached"
+}
+
+func newNotSupportedError(usageClientType UsageClientType) *notSupportedError {
+	return &notSupportedError{
+		usageClientType: usageClientType,
+	}
+}
+
 type usageClient interface {
 	// Both low/high node utilization plugins are expected to invoke sync right
 	// after Balance method is invoked. There's no cache invalidation so each
 	// Balance is expected to get the latest data by invoking sync.
-	sync(nodes []*v1.Node) error
+	sync(ctx context.Context, nodes []*v1.Node) error
 	nodeUtilization(node string) api.ReferencedResourceList
 	pods(node string) []*v1.Pod
 	podUsage(pod *v1.Pod) (api.ReferencedResourceList, error)
@@ -79,7 +105,7 @@ func (s *requestedUsageClient) podUsage(pod *v1.Pod) (api.ReferencedResourceList
 	return usage, nil
 }

-func (s *requestedUsageClient) sync(nodes []*v1.Node) error {
+func (s *requestedUsageClient) sync(ctx context.Context, nodes []*v1.Node) error {
 	s._nodeUtilization = make(map[string]api.ReferencedResourceList)
 	s._pods = make(map[string][]*v1.Pod)

@@ -165,7 +191,7 @@ func (client *actualUsageClient) podUsage(pod *v1.Pod) (api.ReferencedResourceLi
 	return totalUsage, nil
 }

-func (client *actualUsageClient) sync(nodes []*v1.Node) error {
+func (client *actualUsageClient) sync(ctx context.Context, nodes []*v1.Node) error {
 	client._nodeUtilization = make(map[string]api.ReferencedResourceList)
 	client._pods = make(map[string][]*v1.Pod)

@@ -200,3 +226,95 @@ func (client *actualUsageClient) sync(nodes []*v1.Node) error {

 	return nil
 }
+
+type prometheusUsageClient struct {
+	getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
+	promClient            promapi.Client
+	promQuery             string
+
+	_pods            map[string][]*v1.Pod
+	_nodeUtilization map[string]map[v1.ResourceName]*resource.Quantity
+}
+
+var _ usageClient = &actualUsageClient{}
+
+func newPrometheusUsageClient(
+	getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
+	promClient promapi.Client,
+	promQuery string,
+) *prometheusUsageClient {
+	return &prometheusUsageClient{
+		getPodsAssignedToNode: getPodsAssignedToNode,
+		promClient:            promClient,
+		promQuery:             promQuery,
+	}
+}
+
+func (client *prometheusUsageClient) nodeUtilization(node string) map[v1.ResourceName]*resource.Quantity {
+	return client._nodeUtilization[node]
+}
+
+func (client *prometheusUsageClient) pods(node string) []*v1.Pod {
+	return client._pods[node]
+}
+
+func (client *prometheusUsageClient) podUsage(pod *v1.Pod) (map[v1.ResourceName]*resource.Quantity, error) {
+	return nil, newNotSupportedError(prometheusUsageClientType)
+}
+
+func NodeUsageFromPrometheusMetrics(ctx context.Context, promClient promapi.Client, promQuery string) (map[string]map[v1.ResourceName]*resource.Quantity, error) {
+	results, warnings, err := promv1.NewAPI(promClient).Query(ctx, promQuery, time.Now())
+	if err != nil {
+		return nil, fmt.Errorf("unable to capture prometheus metrics: %v", err)
+	}
+	if len(warnings) > 0 {
+		klog.Infof("prometheus metrics warnings: %v", warnings)
+	}
+
+	if results.Type() != model.ValVector {
+		return nil, fmt.Errorf("expected query results to be of type %q, got %q instead", model.ValVector, results.Type())
+	}
+
+	nodeUsages := make(map[string]map[v1.ResourceName]*resource.Quantity)
+	for _, sample := range results.(model.Vector) {
+		nodeName, exists := sample.Metric["instance"]
+		if !exists {
+			return nil, fmt.Errorf("The collected metrics sample is missing 'instance' key")
+		}
+		if sample.Value < 0 || sample.Value > 1 {
+			return nil, fmt.Errorf("The collected metrics sample for %q has value %v outside of <0; 1> interval", string(nodeName), sample.Value)
+		}
+		nodeUsages[string(nodeName)] = map[v1.ResourceName]*resource.Quantity{
+			MetricResource: resource.NewQuantity(int64(sample.Value*100), resource.DecimalSI),
+		}
+	}
+
+	return nodeUsages, nil
+}
+
+func (client *prometheusUsageClient) sync(ctx context.Context, nodes []*v1.Node) error {
+	client._nodeUtilization = make(map[string]map[v1.ResourceName]*resource.Quantity)
+	client._pods = make(map[string][]*v1.Pod)
+
+	nodeUsages, err := NodeUsageFromPrometheusMetrics(ctx, client.promClient, client.promQuery)
+	if err != nil {
+		return err
+	}
+
+	for _, node := range nodes {
+		if _, exists := nodeUsages[node.Name]; !exists {
+			return fmt.Errorf("unable to find metric entry for %v", node.Name)
+		}
+		pods, err := podutil.ListPodsOnANode(node.Name, client.getPodsAssignedToNode, nil)
+		if err != nil {
+			klog.V(2).InfoS("Node will not be processed, error accessing its pods", "node", klog.KObj(node), "err", err)
+			return fmt.Errorf("error accessing %q node's pods: %v", node.Name, err)
+		}
+
+		// store the snapshot of pods from the same (or the closest) node utilization computation
+		client._pods[node.Name] = pods
+		client._nodeUtilization[node.Name] = nodeUsages[node.Name]
+	}
+
+	return nil
+}