1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Refractor - Modify the common functions to be used by high utilisation

This commit is contained in:
Hanu
2021-06-01 21:31:11 +08:00
parent 6e71068f4f
commit 2f18864fa5
3 changed files with 50 additions and 42 deletions

View File

@@ -20,6 +20,7 @@ import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
@@ -33,7 +34,7 @@ import (
// to calculate nodes' utilization and not the actual resource usage.
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if err := validateLowNodeUtilizationParams(strategy.Params); err != nil {
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
return
}
@@ -50,7 +51,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
klog.ErrorS(err, "LowNodeUtilization config is not valid")
return
}
@@ -69,7 +70,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
}
resourceNames := getResourceNames(thresholds)
lowNodes, targetNodes := classifyNodes(
lowNodes, sourceNodes := classifyNodes(
getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames),
// The node has to be schedulable (to be able to move workload there)
func(node *v1.Node, usage NodeUsage) bool {
@@ -110,7 +111,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
}
}
klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...)
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(targetNodes))
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
if len(lowNodes) == 0 {
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
@@ -127,26 +128,42 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
return
}
if len(targetNodes) == 0 {
if len(sourceNodes) == 0 {
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
return
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
evictPodsFromTargetNodes(
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
if !isNodeAboveTargetUtilization(nodeUsage) {
return false
}
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
evictPodsFromSourceNodes(
ctx,
targetNodes,
sourceNodes,
lowNodes,
podEvictor,
evictable.IsEvictable,
resourceNames)
resourceNames,
"LowNodeUtilization",
continueEvictionCond)
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
}
// validateStrategyConfig checks if the strategy's config is valid
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validateLowUtilizationStrategyConfig checks if the strategy's config is valid
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)

View File

@@ -818,7 +818,7 @@ func TestLowNodeUtilization(t *testing.T) {
}
}
func TestValidateStrategyConfig(t *testing.T) {
func TestValidateLowNodeUtilizationStrategyConfig(t *testing.T) {
tests := []struct {
name string
thresholds api.ResourceThresholds
@@ -958,7 +958,7 @@ func TestValidateStrategyConfig(t *testing.T) {
}
for _, testCase := range tests {
validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds)
validateErr := validateLowUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
@@ -972,9 +972,7 @@ func TestValidateStrategyConfig(t *testing.T) {
}
}
func TestWithTaints(t *testing.T) {
func TestLowNodeUtilizationWithTaints(t *testing.T) {
ctx := context.Background()
strategy := api.DeschedulerStrategy{
Enabled: true,

View File

@@ -40,6 +40,8 @@ type NodeUsage struct {
highResourceThreshold map[v1.ResourceName]*resource.Quantity
}
type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
// NodePodsMap is a set of (node, pods) pairs
type NodePodsMap map[*v1.Node][]*v1.Pod
@@ -50,7 +52,7 @@ const (
MaxResourcePercentage = 100
)
func validateLowNodeUtilizationParams(params *api.StrategyParameters) error {
func validateNodeUtilizationParams(params *api.StrategyParameters) error {
if params == nil || params.NodeResourceUtilizationThresholds == nil {
return fmt.Errorf("NodeResourceUtilizationThresholds not set")
}
@@ -172,18 +174,20 @@ func classifyNodes(
return lowNodes, highNodes
}
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evictPodsFromSourceNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(
func evictPodsFromSourceNodes(
ctx context.Context,
targetNodes, lowNodes []NodeUsage,
sourceNodes, destinationNodes []NodeUsage,
podEvictor *evictions.PodEvictor,
podFilter func(pod *v1.Pod) bool,
resourceNames []v1.ResourceName,
strategy string,
continueEviction continueEvictionCond,
) {
sortNodesByUsage(targetNodes)
sortNodesByUsage(sourceNodes)
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{
@@ -192,9 +196,9 @@ func evictPodsFromTargetNodes(
v1.ResourceMemory: {},
}
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
for _, node := range lowNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
var taintsOfDestinationNodes = make(map[string][]v1.Taint, len(destinationNodes))
for _, node := range destinationNodes {
taintsOfDestinationNodes[node.node.Name] = node.node.Spec.Taints
for _, name := range resourceNames {
if _, ok := totalAvailableUsage[name]; !ok {
@@ -218,7 +222,7 @@ func evictPodsFromTargetNodes(
}
klog.V(1).InfoS("Total capacity to be moved", keysAndValues...)
for _, node := range targetNodes {
for _, node := range sourceNodes {
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
@@ -232,7 +236,7 @@ func evictPodsFromTargetNodes(
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfLowNodes, podEvictor)
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction)
klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage)
}
}
@@ -244,29 +248,18 @@ func evictPods(
totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
strategy string,
continueEviction continueEvictionCond,
) {
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
continueCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
if !isNodeAboveTargetUtilization(nodeUsage) {
return false
}
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
if continueCond(nodeUsage, totalAvailableUsage) {
if continueEviction(nodeUsage, totalAvailableUsage) {
for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
continue
}
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, "LowNodeUtilization")
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy)
if err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
@@ -299,8 +292,8 @@ func evictPods(
}
klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
if !continueCond(nodeUsage, totalAvailableUsage) {
// check if pods can be still evicted
if !continueEviction(nodeUsage, totalAvailableUsage) {
break
}
}