mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
Refractor - Modify the common functions to be used by high utilisation
This commit is contained in:
@@ -20,6 +20,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
@@ -33,7 +34,7 @@ import (
|
||||
// to calculate nodes' utilization and not the actual resource usage.
|
||||
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
|
||||
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
|
||||
if err := validateLowNodeUtilizationParams(strategy.Params); err != nil {
|
||||
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
|
||||
klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
|
||||
return
|
||||
}
|
||||
@@ -50,7 +51,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
|
||||
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
||||
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
||||
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
|
||||
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
|
||||
klog.ErrorS(err, "LowNodeUtilization config is not valid")
|
||||
return
|
||||
}
|
||||
@@ -69,7 +70,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
}
|
||||
resourceNames := getResourceNames(thresholds)
|
||||
|
||||
lowNodes, targetNodes := classifyNodes(
|
||||
lowNodes, sourceNodes := classifyNodes(
|
||||
getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames),
|
||||
// The node has to be schedulable (to be able to move workload there)
|
||||
func(node *v1.Node, usage NodeUsage) bool {
|
||||
@@ -110,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
}
|
||||
}
|
||||
klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...)
|
||||
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(targetNodes))
|
||||
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
|
||||
|
||||
if len(lowNodes) == 0 {
|
||||
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
||||
@@ -127,26 +128,42 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
return
|
||||
}
|
||||
|
||||
if len(targetNodes) == 0 {
|
||||
if len(sourceNodes) == 0 {
|
||||
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
|
||||
return
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
evictPodsFromTargetNodes(
|
||||
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
||||
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
if !isNodeAboveTargetUtilization(nodeUsage) {
|
||||
return false
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
evictPodsFromSourceNodes(
|
||||
ctx,
|
||||
targetNodes,
|
||||
sourceNodes,
|
||||
lowNodes,
|
||||
podEvictor,
|
||||
evictable.IsEvictable,
|
||||
resourceNames)
|
||||
resourceNames,
|
||||
"LowNodeUtilization",
|
||||
continueEvictionCond)
|
||||
|
||||
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
|
||||
}
|
||||
|
||||
// validateStrategyConfig checks if the strategy's config is valid
|
||||
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
|
||||
// validateLowUtilizationStrategyConfig checks if the strategy's config is valid
|
||||
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
|
||||
// validate thresholds and targetThresholds config
|
||||
if err := validateThresholds(thresholds); err != nil {
|
||||
return fmt.Errorf("thresholds config is not valid: %v", err)
|
||||
|
||||
@@ -818,7 +818,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateStrategyConfig(t *testing.T) {
|
||||
func TestValidateLowNodeUtilizationStrategyConfig(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
thresholds api.ResourceThresholds
|
||||
@@ -958,7 +958,7 @@ func TestValidateStrategyConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range tests {
|
||||
validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds)
|
||||
validateErr := validateLowUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
|
||||
|
||||
if validateErr == nil || testCase.errInfo == nil {
|
||||
if validateErr != testCase.errInfo {
|
||||
@@ -972,9 +972,7 @@ func TestValidateStrategyConfig(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
func TestWithTaints(t *testing.T) {
|
||||
func TestLowNodeUtilizationWithTaints(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
strategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
|
||||
@@ -40,6 +40,8 @@ type NodeUsage struct {
|
||||
highResourceThreshold map[v1.ResourceName]*resource.Quantity
|
||||
}
|
||||
|
||||
type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
|
||||
|
||||
// NodePodsMap is a set of (node, pods) pairs
|
||||
type NodePodsMap map[*v1.Node][]*v1.Pod
|
||||
|
||||
@@ -50,7 +52,7 @@ const (
|
||||
MaxResourcePercentage = 100
|
||||
)
|
||||
|
||||
func validateLowNodeUtilizationParams(params *api.StrategyParameters) error {
|
||||
func validateNodeUtilizationParams(params *api.StrategyParameters) error {
|
||||
if params == nil || params.NodeResourceUtilizationThresholds == nil {
|
||||
return fmt.Errorf("NodeResourceUtilizationThresholds not set")
|
||||
}
|
||||
@@ -172,18 +174,20 @@ func classifyNodes(
|
||||
return lowNodes, highNodes
|
||||
}
|
||||
|
||||
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
|
||||
// evictPodsFromSourceNodes evicts pods based on priority, if all the pods on the node have priority, if not
|
||||
// evicts them based on QoS as fallback option.
|
||||
// TODO: @ravig Break this function into smaller functions.
|
||||
func evictPodsFromTargetNodes(
|
||||
func evictPodsFromSourceNodes(
|
||||
ctx context.Context,
|
||||
targetNodes, lowNodes []NodeUsage,
|
||||
sourceNodes, destinationNodes []NodeUsage,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
podFilter func(pod *v1.Pod) bool,
|
||||
resourceNames []v1.ResourceName,
|
||||
strategy string,
|
||||
continueEviction continueEvictionCond,
|
||||
) {
|
||||
|
||||
sortNodesByUsage(targetNodes)
|
||||
sortNodesByUsage(sourceNodes)
|
||||
|
||||
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
|
||||
totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{
|
||||
@@ -192,9 +196,9 @@ func evictPodsFromTargetNodes(
|
||||
v1.ResourceMemory: {},
|
||||
}
|
||||
|
||||
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
|
||||
for _, node := range lowNodes {
|
||||
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
|
||||
var taintsOfDestinationNodes = make(map[string][]v1.Taint, len(destinationNodes))
|
||||
for _, node := range destinationNodes {
|
||||
taintsOfDestinationNodes[node.node.Name] = node.node.Spec.Taints
|
||||
|
||||
for _, name := range resourceNames {
|
||||
if _, ok := totalAvailableUsage[name]; !ok {
|
||||
@@ -218,7 +222,7 @@ func evictPodsFromTargetNodes(
|
||||
}
|
||||
klog.V(1).InfoS("Total capacity to be moved", keysAndValues...)
|
||||
|
||||
for _, node := range targetNodes {
|
||||
for _, node := range sourceNodes {
|
||||
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
|
||||
|
||||
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
|
||||
@@ -232,7 +236,7 @@ func evictPodsFromTargetNodes(
|
||||
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
|
||||
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
|
||||
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfLowNodes, podEvictor)
|
||||
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction)
|
||||
klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage)
|
||||
}
|
||||
}
|
||||
@@ -244,29 +248,18 @@ func evictPods(
|
||||
totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
|
||||
taintsOfLowNodes map[string][]v1.Taint,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
strategy string,
|
||||
continueEviction continueEvictionCond,
|
||||
) {
|
||||
|
||||
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
|
||||
continueCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
if !isNodeAboveTargetUtilization(nodeUsage) {
|
||||
return false
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
if continueCond(nodeUsage, totalAvailableUsage) {
|
||||
if continueEviction(nodeUsage, totalAvailableUsage) {
|
||||
for _, pod := range inputPods {
|
||||
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
||||
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
|
||||
continue
|
||||
}
|
||||
|
||||
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, "LowNodeUtilization")
|
||||
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
|
||||
break
|
||||
@@ -299,8 +292,8 @@ func evictPods(
|
||||
}
|
||||
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
|
||||
if !continueCond(nodeUsage, totalAvailableUsage) {
|
||||
// check if pods can be still evicted
|
||||
if !continueEviction(nodeUsage, totalAvailableUsage) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user