mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 21:31:18 +01:00
feature: use contextal logging for plugins
Signed-off-by: googs1025 <googs1025@gmail.com>
This commit is contained in:
@@ -44,6 +44,7 @@ var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}
|
||||
// can schedule according to its plugin. Note that CPU/Memory requests are used
|
||||
// to calculate nodes' utilization and not the actual resource usage.
|
||||
type HighNodeUtilization struct {
|
||||
logger klog.Logger
|
||||
handle frameworktypes.Handle
|
||||
args *HighNodeUtilizationArgs
|
||||
podFilter func(pod *v1.Pod) bool
|
||||
@@ -64,6 +65,7 @@ func NewHighNodeUtilization(
|
||||
genericArgs,
|
||||
)
|
||||
}
|
||||
logger := klog.FromContext(ctx).WithValues("plugin", HighNodeUtilizationPluginName)
|
||||
|
||||
// this plugins worries only about thresholds but the nodeplugins
|
||||
// package was made to take two thresholds into account, one for low
|
||||
@@ -113,6 +115,7 @@ func NewHighNodeUtilization(
|
||||
)
|
||||
|
||||
return &HighNodeUtilization{
|
||||
logger: logger,
|
||||
handle: handle,
|
||||
args: args,
|
||||
resourceNames: resourceNames,
|
||||
@@ -135,6 +138,8 @@ func (h *HighNodeUtilization) Name() string {
|
||||
// utilized nodes. The goal here is to concentrate pods in fewer nodes so that
|
||||
// less nodes are used.
|
||||
func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
|
||||
logger := klog.FromContext(klog.NewContext(ctx, h.logger)).WithValues("ExtensionPoint", frameworktypes.BalanceExtensionPoint)
|
||||
|
||||
if err := h.usageClient.sync(ctx, nodes); err != nil {
|
||||
return &frameworktypes.Status{
|
||||
Err: fmt.Errorf("error getting node usage: %v", err),
|
||||
@@ -165,7 +170,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
|
||||
// schedulable nodes.
|
||||
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
|
||||
klog.V(2).InfoS(
|
||||
logger.V(2).Info(
|
||||
"Node is unschedulable",
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
)
|
||||
@@ -184,7 +189,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
|
||||
category := []string{"underutilized", "overutilized"}
|
||||
for i := range nodeGroups {
|
||||
for nodeName := range nodeGroups[i] {
|
||||
klog.InfoS(
|
||||
logger.Info(
|
||||
"Node has been classified",
|
||||
"category", category[i],
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
@@ -208,18 +213,18 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
|
||||
|
||||
lowNodes, schedulableNodes := nodeInfos[0], nodeInfos[1]
|
||||
|
||||
klog.V(1).InfoS("Criteria for a node below target utilization", h.criteria...)
|
||||
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
|
||||
logger.V(1).Info("Criteria for a node below target utilization", h.criteria...)
|
||||
logger.V(1).Info("Number of underutilized nodes", "totalNumber", len(lowNodes))
|
||||
|
||||
if len(lowNodes) == 0 {
|
||||
klog.V(1).InfoS(
|
||||
logger.V(1).Info(
|
||||
"No node is underutilized, nothing to do here, you might tune your thresholds further",
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(lowNodes) <= h.args.NumberOfNodes {
|
||||
klog.V(1).InfoS(
|
||||
logger.V(1).Info(
|
||||
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
|
||||
"underutilizedNodes", len(lowNodes),
|
||||
"numberOfNodes", h.args.NumberOfNodes,
|
||||
@@ -228,12 +233,12 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
|
||||
}
|
||||
|
||||
if len(lowNodes) == len(nodes) {
|
||||
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
|
||||
logger.V(1).Info("All nodes are underutilized, nothing to do here")
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(schedulableNodes) == 0 {
|
||||
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
|
||||
logger.V(1).Info("No node is available to schedule the pods, nothing to do here")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}
|
||||
// nodes. Note that CPU/Memory requests are used to calculate nodes'
|
||||
// utilization and not the actual resource usage.
|
||||
type LowNodeUtilization struct {
|
||||
logger klog.Logger
|
||||
handle frameworktypes.Handle
|
||||
args *LowNodeUtilizationArgs
|
||||
podFilter func(pod *v1.Pod) bool
|
||||
@@ -66,6 +67,7 @@ func NewLowNodeUtilization(
|
||||
genericArgs,
|
||||
)
|
||||
}
|
||||
logger := klog.FromContext(ctx).WithValues("plugin", LowNodeUtilizationPluginName)
|
||||
|
||||
// resourceNames holds a list of resources for which the user has
|
||||
// provided thresholds for. extendedResourceNames holds those as well
|
||||
@@ -115,6 +117,7 @@ func NewLowNodeUtilization(
|
||||
}
|
||||
|
||||
return &LowNodeUtilization{
|
||||
logger: logger,
|
||||
handle: handle,
|
||||
args: args,
|
||||
underCriteria: thresholdsToKeysAndValues(args.Thresholds),
|
||||
@@ -135,6 +138,8 @@ func (l *LowNodeUtilization) Name() string {
|
||||
// utilized nodes to under utilized nodes. The goal here is to evenly
|
||||
// distribute pods across nodes.
|
||||
func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
|
||||
logger := klog.FromContext(klog.NewContext(ctx, l.logger)).WithValues("ExtensionPoint", frameworktypes.BalanceExtensionPoint)
|
||||
|
||||
if err := l.usageClient.sync(ctx, nodes); err != nil {
|
||||
return &frameworktypes.Status{
|
||||
Err: fmt.Errorf("error getting node usage: %v", err),
|
||||
@@ -182,7 +187,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
// underutilized but aren't schedulable are ignored.
|
||||
func(nodeName string, usage, threshold api.ResourceThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(nodesMap[nodeName]) {
|
||||
klog.V(2).InfoS(
|
||||
logger.V(2).Info(
|
||||
"Node is unschedulable, thus not considered as underutilized",
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
)
|
||||
@@ -207,7 +212,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
for nodeName := range nodeGroups[i] {
|
||||
classifiedNodes[nodeName] = true
|
||||
|
||||
klog.InfoS(
|
||||
logger.Info(
|
||||
"Node has been classified",
|
||||
"category", categories[i],
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
@@ -233,7 +238,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
// log nodes that are appropriately utilized.
|
||||
for nodeName := range nodesMap {
|
||||
if !classifiedNodes[nodeName] {
|
||||
klog.InfoS(
|
||||
logger.Info(
|
||||
"Node is appropriately utilized",
|
||||
"node", klog.KObj(nodesMap[nodeName]),
|
||||
"usage", nodesUsageMap[nodeName],
|
||||
@@ -245,20 +250,20 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
lowNodes, highNodes := nodeInfos[0], nodeInfos[1]
|
||||
|
||||
// log messages for nodes with low and high utilization
|
||||
klog.V(1).InfoS("Criteria for a node under utilization", l.underCriteria...)
|
||||
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
|
||||
klog.V(1).InfoS("Criteria for a node above target utilization", l.overCriteria...)
|
||||
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(highNodes))
|
||||
logger.V(1).Info("Criteria for a node under utilization", l.underCriteria...)
|
||||
logger.V(1).Info("Number of underutilized nodes", "totalNumber", len(lowNodes))
|
||||
logger.V(1).Info("Criteria for a node above target utilization", l.overCriteria...)
|
||||
logger.V(1).Info("Number of overutilized nodes", "totalNumber", len(highNodes))
|
||||
|
||||
if len(lowNodes) == 0 {
|
||||
klog.V(1).InfoS(
|
||||
logger.V(1).Info(
|
||||
"No node is underutilized, nothing to do here, you might tune your thresholds further",
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(lowNodes) <= l.args.NumberOfNodes {
|
||||
klog.V(1).InfoS(
|
||||
logger.V(1).Info(
|
||||
"Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here",
|
||||
"underutilizedNodes", len(lowNodes),
|
||||
"numberOfNodes", l.args.NumberOfNodes,
|
||||
@@ -267,12 +272,12 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
}
|
||||
|
||||
if len(lowNodes) == len(nodes) {
|
||||
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
|
||||
logger.V(1).Info("All nodes are underutilized, nothing to do here")
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(highNodes) == 0 {
|
||||
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
|
||||
logger.V(1).Info("All nodes are under target utilization, nothing to do here")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -176,13 +176,14 @@ func evictPodsFromSourceNodes(
|
||||
usageClient usageClient,
|
||||
maxNoOfPodsToEvictPerNode *uint,
|
||||
) {
|
||||
logger := klog.FromContext(ctx)
|
||||
available, err := assessAvailableResourceInNodes(destinationNodes, resourceNames)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "unable to assess available resources in nodes")
|
||||
logger.Error(err, "unable to assess available resources in nodes")
|
||||
return
|
||||
}
|
||||
|
||||
klog.V(1).InfoS("Total capacity to be moved", usageToKeysAndValues(available)...)
|
||||
logger.V(1).Info("Total capacity to be moved", usageToKeysAndValues(available)...)
|
||||
|
||||
destinationTaints := make(map[string][]v1.Taint, len(destinationNodes))
|
||||
for _, node := range destinationNodes {
|
||||
@@ -190,14 +191,14 @@ func evictPodsFromSourceNodes(
|
||||
}
|
||||
|
||||
for _, node := range sourceNodes {
|
||||
klog.V(3).InfoS(
|
||||
logger.V(3).Info(
|
||||
"Evicting pods from node",
|
||||
"node", klog.KObj(node.node),
|
||||
"usage", node.usage,
|
||||
)
|
||||
|
||||
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
|
||||
klog.V(2).InfoS(
|
||||
logger.V(2).Info(
|
||||
"Pods on node",
|
||||
"node", klog.KObj(node.node),
|
||||
"allPods", len(node.allPods),
|
||||
@@ -206,14 +207,14 @@ func evictPodsFromSourceNodes(
|
||||
)
|
||||
|
||||
if len(removablePods) == 0 {
|
||||
klog.V(1).InfoS(
|
||||
logger.V(1).Info(
|
||||
"No removable pods on node, try next node",
|
||||
"node", klog.KObj(node.node),
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
klog.V(1).InfoS(
|
||||
logger.V(1).Info(
|
||||
"Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers",
|
||||
)
|
||||
|
||||
@@ -260,6 +261,7 @@ func evictPods(
|
||||
usageClient usageClient,
|
||||
maxNoOfPodsToEvictPerNode *uint,
|
||||
) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
// preemptive check to see if we should continue evicting pods.
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
return nil
|
||||
@@ -274,7 +276,7 @@ func evictPods(
|
||||
var evictionCounter uint = 0
|
||||
for _, pod := range inputPods {
|
||||
if maxNoOfPodsToEvictPerNode != nil && evictionCounter >= *maxNoOfPodsToEvictPerNode {
|
||||
klog.V(3).InfoS(
|
||||
logger.V(3).Info(
|
||||
"Max number of evictions per node per plugin reached",
|
||||
"limit", *maxNoOfPodsToEvictPerNode,
|
||||
)
|
||||
@@ -282,7 +284,7 @@ func evictPods(
|
||||
}
|
||||
|
||||
if !utils.PodToleratesTaints(pod, destinationTaints) {
|
||||
klog.V(3).InfoS(
|
||||
logger.V(3).Info(
|
||||
"Skipping eviction for pod, doesn't tolerate node taint",
|
||||
"pod", klog.KObj(pod),
|
||||
)
|
||||
@@ -297,7 +299,7 @@ func evictPods(
|
||||
WithoutNamespaces(excludedNamespaces).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "could not build preEvictionFilter with namespace exclusion")
|
||||
logger.Error(err, "could not build preEvictionFilter with namespace exclusion")
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -311,9 +313,8 @@ func evictPods(
|
||||
podUsage, err := usageClient.podUsage(pod)
|
||||
if err != nil {
|
||||
if _, ok := err.(*notSupportedError); !ok {
|
||||
klog.Errorf(
|
||||
"unable to get pod usage for %v/%v: %v",
|
||||
pod.Namespace, pod.Name, err,
|
||||
logger.Error(err,
|
||||
"unable to get pod usage", "pod", klog.KObj(pod),
|
||||
)
|
||||
continue
|
||||
}
|
||||
@@ -325,18 +326,18 @@ func evictPods(
|
||||
case *evictions.EvictionNodeLimitError, *evictions.EvictionTotalLimitError:
|
||||
return err
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
logger.Error(err, "eviction failed")
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if maxNoOfPodsToEvictPerNode == nil && unconstrainedResourceEviction {
|
||||
klog.V(3).InfoS("Currently, only a single pod eviction is allowed")
|
||||
logger.V(3).Info("Currently, only a single pod eviction is allowed")
|
||||
break
|
||||
}
|
||||
|
||||
evictionCounter++
|
||||
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
|
||||
logger.V(3).Info("Evicted pods", "pod", klog.KObj(pod))
|
||||
if unconstrainedResourceEviction {
|
||||
continue
|
||||
}
|
||||
@@ -345,7 +346,7 @@ func evictPods(
|
||||
|
||||
keysAndValues := []any{"node", nodeInfo.node.Name}
|
||||
keysAndValues = append(keysAndValues, usageToKeysAndValues(nodeInfo.usage)...)
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
logger.V(3).Info("Updated node usage", keysAndValues...)
|
||||
|
||||
// make sure we should continue evicting pods.
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
|
||||
@@ -264,12 +264,13 @@ func (client *prometheusUsageClient) podUsage(pod *v1.Pod) (map[v1.ResourceName]
|
||||
}
|
||||
|
||||
func NodeUsageFromPrometheusMetrics(ctx context.Context, promClient promapi.Client, promQuery string) (map[string]map[v1.ResourceName]*resource.Quantity, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
results, warnings, err := promv1.NewAPI(promClient).Query(ctx, promQuery, time.Now())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to capture prometheus metrics: %v", err)
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
klog.Infof("prometheus metrics warnings: %v", warnings)
|
||||
logger.Info("prometheus metrics warnings: %v", warnings)
|
||||
}
|
||||
|
||||
if results.Type() != model.ValVector {
|
||||
|
||||
Reference in New Issue
Block a user