1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 21:31:18 +01:00

feature: use contextal logging for plugins

Signed-off-by: googs1025 <googs1025@gmail.com>
This commit is contained in:
googs1025
2025-03-24 20:04:51 +08:00
parent 9f918371a2
commit 33894afe2b
15 changed files with 140 additions and 88 deletions

View File

@@ -44,6 +44,7 @@ var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}
// can schedule according to its plugin. Note that CPU/Memory requests are used
// to calculate nodes' utilization and not the actual resource usage.
type HighNodeUtilization struct {
logger klog.Logger
handle frameworktypes.Handle
args *HighNodeUtilizationArgs
podFilter func(pod *v1.Pod) bool
@@ -64,6 +65,7 @@ func NewHighNodeUtilization(
genericArgs,
)
}
logger := klog.FromContext(ctx).WithValues("plugin", HighNodeUtilizationPluginName)
// this plugins worries only about thresholds but the nodeplugins
// package was made to take two thresholds into account, one for low
@@ -113,6 +115,7 @@ func NewHighNodeUtilization(
)
return &HighNodeUtilization{
logger: logger,
handle: handle,
args: args,
resourceNames: resourceNames,
@@ -135,6 +138,8 @@ func (h *HighNodeUtilization) Name() string {
// utilized nodes. The goal here is to concentrate pods in fewer nodes so that
// less nodes are used.
func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
logger := klog.FromContext(klog.NewContext(ctx, h.logger)).WithValues("ExtensionPoint", frameworktypes.BalanceExtensionPoint)
if err := h.usageClient.sync(ctx, nodes); err != nil {
return &frameworktypes.Status{
Err: fmt.Errorf("error getting node usage: %v", err),
@@ -165,7 +170,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
// schedulable nodes.
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
klog.V(2).InfoS(
logger.V(2).Info(
"Node is unschedulable",
"node", klog.KObj(nodesMap[nodeName]),
)
@@ -184,7 +189,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
category := []string{"underutilized", "overutilized"}
for i := range nodeGroups {
for nodeName := range nodeGroups[i] {
klog.InfoS(
logger.Info(
"Node has been classified",
"category", category[i],
"node", klog.KObj(nodesMap[nodeName]),
@@ -208,18 +213,18 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
lowNodes, schedulableNodes := nodeInfos[0], nodeInfos[1]
klog.V(1).InfoS("Criteria for a node below target utilization", h.criteria...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
logger.V(1).Info("Criteria for a node below target utilization", h.criteria...)
logger.V(1).Info("Number of underutilized nodes", "totalNumber", len(lowNodes))
if len(lowNodes) == 0 {
klog.V(1).InfoS(
logger.V(1).Info(
"No node is underutilized, nothing to do here, you might tune your thresholds further",
)
return nil
}
if len(lowNodes) <= h.args.NumberOfNodes {
klog.V(1).InfoS(
logger.V(1).Info(
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
"underutilizedNodes", len(lowNodes),
"numberOfNodes", h.args.NumberOfNodes,
@@ -228,12 +233,12 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
}
if len(lowNodes) == len(nodes) {
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
logger.V(1).Info("All nodes are underutilized, nothing to do here")
return nil
}
if len(schedulableNodes) == 0 {
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
logger.V(1).Info("No node is available to schedule the pods, nothing to do here")
return nil
}

View File

@@ -43,6 +43,7 @@ var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}
// nodes. Note that CPU/Memory requests are used to calculate nodes'
// utilization and not the actual resource usage.
type LowNodeUtilization struct {
logger klog.Logger
handle frameworktypes.Handle
args *LowNodeUtilizationArgs
podFilter func(pod *v1.Pod) bool
@@ -66,6 +67,7 @@ func NewLowNodeUtilization(
genericArgs,
)
}
logger := klog.FromContext(ctx).WithValues("plugin", LowNodeUtilizationPluginName)
// resourceNames holds a list of resources for which the user has
// provided thresholds for. extendedResourceNames holds those as well
@@ -115,6 +117,7 @@ func NewLowNodeUtilization(
}
return &LowNodeUtilization{
logger: logger,
handle: handle,
args: args,
underCriteria: thresholdsToKeysAndValues(args.Thresholds),
@@ -135,6 +138,8 @@ func (l *LowNodeUtilization) Name() string {
// utilized nodes to under utilized nodes. The goal here is to evenly
// distribute pods across nodes.
func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
logger := klog.FromContext(klog.NewContext(ctx, l.logger)).WithValues("ExtensionPoint", frameworktypes.BalanceExtensionPoint)
if err := l.usageClient.sync(ctx, nodes); err != nil {
return &frameworktypes.Status{
Err: fmt.Errorf("error getting node usage: %v", err),
@@ -182,7 +187,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
// underutilized but aren't schedulable are ignored.
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
klog.V(2).InfoS(
logger.V(2).Info(
"Node is unschedulable, thus not considered as underutilized",
"node", klog.KObj(nodesMap[nodeName]),
)
@@ -207,7 +212,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
for nodeName := range nodeGroups[i] {
classifiedNodes[nodeName] = true
klog.InfoS(
logger.Info(
"Node has been classified",
"category", categories[i],
"node", klog.KObj(nodesMap[nodeName]),
@@ -233,7 +238,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
// log nodes that are appropriately utilized.
for nodeName := range nodesMap {
if !classifiedNodes[nodeName] {
klog.InfoS(
logger.Info(
"Node is appropriately utilized",
"node", klog.KObj(nodesMap[nodeName]),
"usage", nodesUsageMap[nodeName],
@@ -245,20 +250,20 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
lowNodes, highNodes := nodeInfos[0], nodeInfos[1]
// log messages for nodes with low and high utilization
klog.V(1).InfoS("Criteria for a node under utilization", l.underCriteria...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
klog.V(1).InfoS("Criteria for a node above target utilization", l.overCriteria...)
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(highNodes))
logger.V(1).Info("Criteria for a node under utilization", l.underCriteria...)
logger.V(1).Info("Number of underutilized nodes", "totalNumber", len(lowNodes))
logger.V(1).Info("Criteria for a node above target utilization", l.overCriteria...)
logger.V(1).Info("Number of overutilized nodes", "totalNumber", len(highNodes))
if len(lowNodes) == 0 {
klog.V(1).InfoS(
logger.V(1).Info(
"No node is underutilized, nothing to do here, you might tune your thresholds further",
)
return nil
}
if len(lowNodes) <= l.args.NumberOfNodes {
klog.V(1).InfoS(
logger.V(1).Info(
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
"underutilizedNodes", len(lowNodes),
"numberOfNodes", l.args.NumberOfNodes,
@@ -267,12 +272,12 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
}
if len(lowNodes) == len(nodes) {
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
logger.V(1).Info("All nodes are underutilized, nothing to do here")
return nil
}
if len(highNodes) == 0 {
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
logger.V(1).Info("All nodes are under target utilization, nothing to do here")
return nil
}

View File

@@ -176,13 +176,14 @@ func evictPodsFromSourceNodes(
usageClient usageClient,
maxNoOfPodsToEvictPerNode *uint,
) {
logger := klog.FromContext(ctx)
available, err := assessAvailableResourceInNodes(destinationNodes, resourceNames)
if err != nil {
klog.ErrorS(err, "unable to assess available resources in nodes")
logger.Error(err, "unable to assess available resources in nodes")
return
}
klog.V(1).InfoS("Total capacity to be moved", usageToKeysAndValues(available)...)
logger.V(1).Info("Total capacity to be moved", usageToKeysAndValues(available)...)
destinationTaints := make(map[string][]v1.Taint, len(destinationNodes))
for _, node := range destinationNodes {
@@ -190,14 +191,14 @@ func evictPodsFromSourceNodes(
}
for _, node := range sourceNodes {
klog.V(3).InfoS(
logger.V(3).Info(
"Evicting pods from node",
"node", klog.KObj(node.node),
"usage", node.usage,
)
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
klog.V(2).InfoS(
logger.V(2).Info(
"Pods on node",
"node", klog.KObj(node.node),
"allPods", len(node.allPods),
@@ -206,14 +207,14 @@ func evictPodsFromSourceNodes(
)
if len(removablePods) == 0 {
klog.V(1).InfoS(
logger.V(1).Info(
"No removable pods on node, try next node",
"node", klog.KObj(node.node),
)
continue
}
klog.V(1).InfoS(
logger.V(1).Info(
"Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers",
)
@@ -260,6 +261,7 @@ func evictPods(
usageClient usageClient,
maxNoOfPodsToEvictPerNode *uint,
) error {
logger := klog.FromContext(ctx)
// preemptive check to see if we should continue evicting pods.
if !continueEviction(nodeInfo, totalAvailableUsage) {
return nil
@@ -274,7 +276,7 @@ func evictPods(
var evictionCounter uint = 0
for _, pod := range inputPods {
if maxNoOfPodsToEvictPerNode != nil && evictionCounter >= *maxNoOfPodsToEvictPerNode {
klog.V(3).InfoS(
logger.V(3).Info(
"Max number of evictions per node per plugin reached",
"limit", *maxNoOfPodsToEvictPerNode,
)
@@ -282,7 +284,7 @@ func evictPods(
}
if !utils.PodToleratesTaints(pod, destinationTaints) {
klog.V(3).InfoS(
logger.V(3).Info(
"Skipping eviction for pod, doesn't tolerate node taint",
"pod", klog.KObj(pod),
)
@@ -297,7 +299,7 @@ func evictPods(
WithoutNamespaces(excludedNamespaces).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "could not build preEvictionFilter with namespace exclusion")
logger.Error(err, "could not build preEvictionFilter with namespace exclusion")
continue
}
@@ -311,9 +313,8 @@ func evictPods(
podUsage, err := usageClient.podUsage(pod)
if err != nil {
if _, ok := err.(*notSupportedError); !ok {
klog.Errorf(
"unable to get pod usage for %v/%v: %v",
pod.Namespace, pod.Name, err,
logger.Error(err,
"unable to get pod usage", "pod", klog.KObj(pod),
)
continue
}
@@ -325,18 +326,18 @@ func evictPods(
case *evictions.EvictionNodeLimitError, *evictions.EvictionTotalLimitError:
return err
default:
klog.Errorf("eviction failed: %v", err)
logger.Error(err, "eviction failed")
continue
}
}
if maxNoOfPodsToEvictPerNode == nil && unconstrainedResourceEviction {
klog.V(3).InfoS("Currently, only a single pod eviction is allowed")
logger.V(3).Info("Currently, only a single pod eviction is allowed")
break
}
evictionCounter++
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
logger.V(3).Info("Evicted pods", "pod", klog.KObj(pod))
if unconstrainedResourceEviction {
continue
}
@@ -345,7 +346,7 @@ func evictPods(
keysAndValues := []any{"node", nodeInfo.node.Name}
keysAndValues = append(keysAndValues, usageToKeysAndValues(nodeInfo.usage)...)
klog.V(3).InfoS("Updated node usage", keysAndValues...)
logger.V(3).Info("Updated node usage", keysAndValues...)
// make sure we should continue evicting pods.
if !continueEviction(nodeInfo, totalAvailableUsage) {

View File

@@ -264,12 +264,13 @@ func (client *prometheusUsageClient) podUsage(pod *v1.Pod) (map[v1.ResourceName]
}
func NodeUsageFromPrometheusMetrics(ctx context.Context, promClient promapi.Client, promQuery string) (map[string]map[v1.ResourceName]*resource.Quantity, error) {
logger := klog.FromContext(ctx)
results, warnings, err := promv1.NewAPI(promClient).Query(ctx, promQuery, time.Now())
if err != nil {
return nil, fmt.Errorf("unable to capture prometheus metrics: %v", err)
}
if len(warnings) > 0 {
klog.Infof("prometheus metrics warnings: %v", warnings)
logger.Info("prometheus metrics warnings: %v", warnings)
}
if results.Type() != model.ValVector {