1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-25 20:59:28 +01:00

Merge pull request #1642 from ingvagabund/unconstrainedevictions

lownodeutilization: evictionLimits to limit the evictions per plugin
This commit is contained in:
Kubernetes Prow Robot
2025-03-10 08:43:47 -07:00
committed by GitHub
9 changed files with 127 additions and 3 deletions

View File

@@ -300,6 +300,7 @@ See `metricsCollector` field at [Top Level configuration](#top-level-configurati
|`thresholds`|map(string:int)|
|`targetThresholds`|map(string:int)|
|`numberOfNodes`|int|
|`evictionLimits`|object|
|`evictableNamespaces`|(see [namespace filtering](#namespace-filtering))|
|`metricsUtilization`|object|
|`metricsUtilization.metricsServer`|bool|
@@ -325,6 +326,8 @@ profiles:
"pods": 50
metricsUtilization:
metricsServer: true
evictionLimits:
node: 5
plugins:
balance:
enabled:
@@ -340,10 +343,12 @@ and will not be used to compute node's usage if it's not specified in `threshold
* The valid range of the resource's percentage value is \[0, 100\]
* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
There are two more parameters associated with the `LowNodeUtilization` strategy, called `numberOfNodes` and `evictionLimits`.
The first parameter can be configured to activate the strategy only when the number of under utilized nodes
are above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
The second parameter is useful when a number of evictions per the plugin per a descheduling cycle needs to be limited.
The parameter currently enables to limit the number of evictions per node through `node` field.
### HighNodeUtilization

View File

@@ -64,6 +64,12 @@ type Namespaces struct {
Exclude []string `json:"exclude,omitempty"`
}
// EvictionLimits limits the number of evictions per domain. E.g. node, namespace, total.
type EvictionLimits struct {
// node restricts the maximum number of evictions per node
Node *uint `json:"node,omitempty"`
}
type (
Percentage float64
ResourceThresholds map[v1.ResourceName]Percentage

View File

@@ -112,6 +112,27 @@ func (in *DeschedulerProfile) DeepCopy() *DeschedulerProfile {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *EvictionLimits) DeepCopyInto(out *EvictionLimits) {
*out = *in
if in.Node != nil {
in, out := &in.Node, &out.Node
*out = new(uint)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvictionLimits.
func (in *EvictionLimits) DeepCopy() *EvictionLimits {
if in == nil {
return nil
}
out := new(EvictionLimits)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetricsCollector) DeepCopyInto(out *MetricsCollector) {
*out = *in

View File

@@ -161,6 +161,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
h.resourceNames,
continueEvictionCond,
h.usageClient,
nil,
)
return nil

View File

@@ -183,6 +183,11 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
// Sort the nodes by the usage in descending order
sortNodesByUsage(sourceNodes, false)
var nodeLimit *uint
if l.args.EvictionLimits != nil {
nodeLimit = l.args.EvictionLimits.Node
}
evictPodsFromSourceNodes(
ctx,
l.args.EvictableNamespaces,
@@ -194,6 +199,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
l.resourceNames,
continueEvictionCond,
l.usageClient,
nodeLimit,
)
return nil

View File

@@ -31,6 +31,7 @@ import (
core "k8s.io/client-go/testing"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
"k8s.io/utils/ptr"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
@@ -63,6 +64,7 @@ func TestLowNodeUtilization(t *testing.T) {
expectedPodsWithMetricsEvicted uint
evictedPods []string
evictableNamespaces *api.Namespaces
evictionLimits *api.EvictionLimits
}{
{
name: "no evictable pods",
@@ -1122,6 +1124,72 @@ func TestLowNodeUtilization(t *testing.T) {
expectedPodsEvicted: 3,
expectedPodsWithMetricsEvicted: 2,
},
{
name: "without priorities with node eviction limit",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
evictionLimits: &api.EvictionLimits{
Node: ptr.To[uint](2),
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI),
},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 2,
expectedPodsWithMetricsEvicted: 2,
},
}
for _, tc := range testCases {
@@ -1193,6 +1261,7 @@ func TestLowNodeUtilization(t *testing.T) {
Thresholds: tc.thresholds,
TargetThresholds: tc.targetThresholds,
UseDeviationThresholds: tc.useDeviationThresholds,
EvictionLimits: tc.evictionLimits,
EvictableNamespaces: tc.evictableNamespaces,
MetricsUtilization: MetricsUtilization{
MetricsServer: metricsEnabled,

View File

@@ -239,6 +239,7 @@ func evictPodsFromSourceNodes(
resourceNames []v1.ResourceName,
continueEviction continueEvictionCond,
usageClient usageClient,
maxNoOfPodsToEvictPerNode *uint,
) {
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
totalAvailableUsage := api.ReferencedResourceList{}
@@ -280,7 +281,7 @@ func evictPodsFromSourceNodes(
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
err := evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, evictOptions, continueEviction, usageClient)
err := evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, evictOptions, continueEviction, usageClient, maxNoOfPodsToEvictPerNode)
if err != nil {
switch err.(type) {
case *evictions.EvictionTotalLimitError:
@@ -302,14 +303,20 @@ func evictPods(
evictOptions evictions.EvictOptions,
continueEviction continueEvictionCond,
usageClient usageClient,
maxNoOfPodsToEvictPerNode *uint,
) error {
var excludedNamespaces sets.Set[string]
if evictableNamespaces != nil {
excludedNamespaces = sets.New(evictableNamespaces.Exclude...)
}
var evictionCounter uint = 0
if continueEviction(nodeInfo, totalAvailableUsage) {
for _, pod := range inputPods {
if maxNoOfPodsToEvictPerNode != nil && evictionCounter >= *maxNoOfPodsToEvictPerNode {
klog.V(3).InfoS("Max number of evictions per node per plugin reached", "limit", *maxNoOfPodsToEvictPerNode)
break
}
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
continue
@@ -334,6 +341,7 @@ func evictPods(
}
err = podEvictor.Evict(ctx, pod, evictOptions)
if err == nil {
evictionCounter++
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
for name := range totalAvailableUsage {

View File

@@ -34,6 +34,9 @@ type LowNodeUtilizationArgs struct {
// considered while considering resources used by pods
// but then filtered out before eviction
EvictableNamespaces *api.Namespaces `json:"evictableNamespaces,omitempty"`
// evictionLimits limits the number of evictions per domain. E.g. node, namespace, total.
EvictionLimits *api.EvictionLimits `json:"evictionLimits,omitempty"`
}
// +k8s:deepcopy-gen=true

View File

@@ -88,6 +88,11 @@ func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) {
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.EvictionLimits != nil {
in, out := &in.EvictionLimits, &out.EvictionLimits
*out = new(api.EvictionLimits)
(*in).DeepCopyInto(*out)
}
return
}