diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index 86cdc4568..07259a3e3 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -18,6 +18,7 @@ package strategies import ( "context" + "fmt" "reflect" "sort" "strings" @@ -34,6 +35,17 @@ import ( "sigs.k8s.io/descheduler/pkg/utils" ) +func validateRemoveDuplicatePodsParams(params *api.StrategyParameters) error { + if params == nil { + return nil + } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } + + return nil +} + // RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node. // A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same // namespace, and have at least one container with the same image. @@ -45,10 +57,20 @@ func RemoveDuplicatePods( nodes []*v1.Node, podEvictor *evictions.PodEvictor, ) { + if err := validateRemoveDuplicatePodsParams(strategy.Params); err != nil { + klog.V(1).Info(err) + return + } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) + return + } + for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) pods, err := podutil.ListPodsOnANode(ctx, client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) + return podEvictor.IsEvictable(pod, thresholdPriority) })) if err != nil { klog.Errorf("error listing evictable pods on node %s: %+v", node.Name, err) diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 1042212e5..2a58101bf 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -50,13 +50,28 @@ const ( MaxResourcePercentage = 100 ) +func validateLowNodeUtilizationParams(params *api.StrategyParameters) error { + if params == nil || params.NodeResourceUtilizationThresholds == nil { + return fmt.Errorf("NodeResourceUtilizationThresholds not set") + } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } + + return nil +} + // LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used // to calculate nodes' utilization and not the actual resource usage. func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) { - // todo: move to config validation? // TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params? - if strategy.Params == nil || strategy.Params.NodeResourceUtilizationThresholds == nil { - klog.V(1).Infof("NodeResourceUtilizationThresholds not set") + if err := validateLowNodeUtilizationParams(strategy.Params); err != nil { + klog.V(1).Info(err) + return + } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) return } @@ -116,7 +131,8 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg targetNodes, lowNodes, targetThresholds, - podEvictor) + podEvictor, + thresholdPriority) klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted()) } @@ -196,6 +212,7 @@ func evictPodsFromTargetNodes( targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, podEvictor *evictions.PodEvictor, + thresholdPriority int32, ) { sortNodesByUsage(targetNodes) @@ -232,7 +249,7 @@ func evictPodsFromTargetNodes( } klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage) - nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor) + nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor, thresholdPriority) klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, removablePods:%v", len(node.allPods), len(nonRemovablePods), len(removablePods)) if len(removablePods) == 0 { @@ -393,11 +410,11 @@ func nodeUtilization(node *v1.Node, pods []*v1.Pod) api.ResourceThresholds { } } -func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor) ([]*v1.Pod, []*v1.Pod) { +func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor, thresholdPriority int32) ([]*v1.Pod, []*v1.Pod) { var nonRemovablePods, removablePods []*v1.Pod for _, pod := range pods { - if !evictor.IsEvictable(pod, utils.SystemCriticalPriority) { + if !evictor.IsEvictable(pod, thresholdPriority) { nonRemovablePods = append(nonRemovablePods, pod) } else { removablePods = append(removablePods, pod) diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index 6da8bc240..e4d5eac5a 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -39,6 +39,9 @@ func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) err if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return fmt.Errorf("only one of Include/Exclude namespaces can be set") } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } return nil } @@ -49,6 +52,12 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter klog.V(1).Info(err) return } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) + return + } + for _, nodeAffinity := range strategy.Params.NodeAffinityType { klog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity) @@ -62,7 +71,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) && + return podEvictor.IsEvictable(pod, thresholdPriority) && !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) }), diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index a1b34c61b..2a94e0e5e 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -39,6 +39,9 @@ func validateRemovePodsViolatingNodeTaintsParams(params *api.StrategyParameters) if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return fmt.Errorf("only one of Include/Exclude namespaces can be set") } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } return nil } @@ -53,6 +56,11 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa if strategy.Params != nil { namespaces = strategy.Params.Namespaces } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) + return + } for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) @@ -61,7 +69,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) + return podEvictor.IsEvictable(pod, thresholdPriority) }), podutil.WithNamespaces(namespaces.Include), podutil.WithoutNamespaces(namespaces.Exclude), diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index 759345b5b..143b310a1 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -40,6 +40,9 @@ func validateRemovePodsViolatingInterPodAntiAffinityParams(params *api.StrategyP if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return fmt.Errorf("only one of Include/Exclude namespaces can be set") } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } return nil } @@ -50,6 +53,11 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients if strategy.Params != nil { namespaces = strategy.Params.Namespaces } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) + return + } for _, node := range nodes { klog.V(1).Infof("Processing node: %#v\n", node.Name) @@ -58,7 +66,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) + return podEvictor.IsEvictable(pod, thresholdPriority) }), podutil.WithNamespaces(namespaces.Include), podutil.WithoutNamespaces(namespaces.Exclude), diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 828ccfa12..16cdb09a2 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -40,6 +40,9 @@ func validatePodLifeTimeParams(params *api.StrategyParameters) error { if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return fmt.Errorf("only one of Include/Exclude namespaces can be set") } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } return nil } @@ -50,11 +53,16 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D klog.V(1).Info(err) return } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) + return + } for _, node := range nodes { klog.V(1).Infof("Processing node: %#v", node.Name) - pods := listOldPodsOnNode(ctx, client, node, strategy.Params, podEvictor) + pods := listOldPodsOnNode(ctx, client, node, strategy.Params, podEvictor, thresholdPriority) for _, pod := range pods { success, err := podEvictor.EvictPod(ctx, pod, node, "PodLifeTime") if success { @@ -70,13 +78,13 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D } } -func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, params *api.StrategyParameters, podEvictor *evictions.PodEvictor) []*v1.Pod { +func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, params *api.StrategyParameters, podEvictor *evictions.PodEvictor, thresholdPriority int32) []*v1.Pod { pods, err := podutil.ListPodsOnANode( ctx, client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) + return podEvictor.IsEvictable(pod, thresholdPriority) }), podutil.WithNamespaces(params.Namespaces.Include), podutil.WithoutNamespaces(params.Namespaces.Exclude), diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index dac1d704f..81caf0b01 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -39,6 +39,9 @@ func validateRemovePodsHavingTooManyRestartsParams(params *api.StrategyParameter if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return fmt.Errorf("only one of Include/Exclude namespaces can be set") } + if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" { + return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set") + } return nil } @@ -51,6 +54,12 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter klog.V(1).Info(err) return } + thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params) + if err != nil { + klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err) + return + } + for _, node := range nodes { klog.V(1).Infof("Processing node: %s", node.Name) pods, err := podutil.ListPodsOnANode( @@ -58,7 +67,7 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter client, node, podutil.WithFilter(func(pod *v1.Pod) bool { - return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority) + return podEvictor.IsEvictable(pod, thresholdPriority) }), podutil.WithNamespaces(strategy.Params.Namespaces.Include), podutil.WithoutNamespaces(strategy.Params.Namespaces.Exclude), diff --git a/pkg/utils/priority.go b/pkg/utils/priority.go index a5a5737cf..709273dfc 100644 --- a/pkg/utils/priority.go +++ b/pkg/utils/priority.go @@ -1,9 +1,15 @@ package utils import ( + "context" + "fmt" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" + clientset "k8s.io/client-go/kubernetes" + "sigs.k8s.io/descheduler/pkg/api" ) const SystemCriticalPriority = 2 * int32(1000000000) @@ -33,3 +39,36 @@ func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, se } return true } + +// GetPriorityFromPriorityClass gets priority from the given priority class. +// If no priority class is provided, it will return SystemCriticalPriority by default. +func GetPriorityFromPriorityClass(ctx context.Context, client clientset.Interface, name string) (int32, error) { + if name != "" { + priorityClass, err := client.SchedulingV1().PriorityClasses().Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return 0, err + } + return priorityClass.Value, nil + } + return SystemCriticalPriority, nil +} + +// GetPriorityFromStrategyParams gets priority from the given StrategyParameters. +// It will return SystemCriticalPriority by default. +func GetPriorityFromStrategyParams(ctx context.Context, client clientset.Interface, params *api.StrategyParameters) (priority int32, err error) { + if params == nil { + return SystemCriticalPriority, nil + } + if params.ThresholdPriority != nil { + priority = *params.ThresholdPriority + } else { + priority, err = GetPriorityFromPriorityClass(ctx, client, params.ThresholdPriorityClassName) + if err != nil { + return + } + } + if priority > SystemCriticalPriority { + return 0, fmt.Errorf("Priority threshold can't be greater than %d", SystemCriticalPriority) + } + return +}