1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

lownodeutilization: evictionLimits to limit the evictions per plugin

In some cases it might be usefull to limit how many evictions per a
domain can be performed. To avoid burning the whole per descheduling
cycle budget. Limiting the number of evictions per node is a
prerequisite for evicting pods whose usage can't be easily subtracted
from overall node resource usage to predict the final usage. E.g. when a
pod is evicted due to high PSI pressure which takes into account many
factors which can be fully captured by the current predictive resource
model.
This commit is contained in:
Jan Chaloupka
2025-03-07 14:18:26 +01:00
parent 50dd3b8971
commit 5bf11813e6
9 changed files with 127 additions and 3 deletions

View File

@@ -64,6 +64,12 @@ type Namespaces struct {
Exclude []string `json:"exclude,omitempty"`
}
// EvictionLimits limits the number of evictions per domain. E.g. node, namespace, total.
type EvictionLimits struct {
// node restricts the maximum number of evictions per node
Node *uint `json:"node,omitempty"`
}
type (
Percentage float64
ResourceThresholds map[v1.ResourceName]Percentage

View File

@@ -112,6 +112,27 @@ func (in *DeschedulerProfile) DeepCopy() *DeschedulerProfile {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *EvictionLimits) DeepCopyInto(out *EvictionLimits) {
*out = *in
if in.Node != nil {
in, out := &in.Node, &out.Node
*out = new(uint)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EvictionLimits.
func (in *EvictionLimits) DeepCopy() *EvictionLimits {
if in == nil {
return nil
}
out := new(EvictionLimits)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetricsCollector) DeepCopyInto(out *MetricsCollector) {
*out = *in

View File

@@ -161,6 +161,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
h.resourceNames,
continueEvictionCond,
h.usageClient,
nil,
)
return nil

View File

@@ -183,6 +183,11 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
// Sort the nodes by the usage in descending order
sortNodesByUsage(sourceNodes, false)
var nodeLimit *uint
if l.args.EvictionLimits != nil {
nodeLimit = l.args.EvictionLimits.Node
}
evictPodsFromSourceNodes(
ctx,
l.args.EvictableNamespaces,
@@ -194,6 +199,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
l.resourceNames,
continueEvictionCond,
l.usageClient,
nodeLimit,
)
return nil

View File

@@ -31,6 +31,7 @@ import (
core "k8s.io/client-go/testing"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
"k8s.io/utils/ptr"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
@@ -63,6 +64,7 @@ func TestLowNodeUtilization(t *testing.T) {
expectedPodsWithMetricsEvicted uint
evictedPods []string
evictableNamespaces *api.Namespaces
evictionLimits *api.EvictionLimits
}{
{
name: "no evictable pods",
@@ -1122,6 +1124,72 @@ func TestLowNodeUtilization(t *testing.T) {
expectedPodsEvicted: 3,
expectedPodsWithMetricsEvicted: 2,
},
{
name: "without priorities with node eviction limit",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
evictionLimits: &api.EvictionLimits{
Node: ptr.To[uint](2),
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI),
},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 2,
expectedPodsWithMetricsEvicted: 2,
},
}
for _, tc := range testCases {
@@ -1193,6 +1261,7 @@ func TestLowNodeUtilization(t *testing.T) {
Thresholds: tc.thresholds,
TargetThresholds: tc.targetThresholds,
UseDeviationThresholds: tc.useDeviationThresholds,
EvictionLimits: tc.evictionLimits,
EvictableNamespaces: tc.evictableNamespaces,
MetricsUtilization: MetricsUtilization{
MetricsServer: metricsEnabled,

View File

@@ -239,6 +239,7 @@ func evictPodsFromSourceNodes(
resourceNames []v1.ResourceName,
continueEviction continueEvictionCond,
usageClient usageClient,
maxNoOfPodsToEvictPerNode *uint,
) {
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
totalAvailableUsage := api.ReferencedResourceList{}
@@ -280,7 +281,7 @@ func evictPodsFromSourceNodes(
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
err := evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, evictOptions, continueEviction, usageClient)
err := evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, evictOptions, continueEviction, usageClient, maxNoOfPodsToEvictPerNode)
if err != nil {
switch err.(type) {
case *evictions.EvictionTotalLimitError:
@@ -302,14 +303,20 @@ func evictPods(
evictOptions evictions.EvictOptions,
continueEviction continueEvictionCond,
usageClient usageClient,
maxNoOfPodsToEvictPerNode *uint,
) error {
var excludedNamespaces sets.Set[string]
if evictableNamespaces != nil {
excludedNamespaces = sets.New(evictableNamespaces.Exclude...)
}
var evictionCounter uint = 0
if continueEviction(nodeInfo, totalAvailableUsage) {
for _, pod := range inputPods {
if maxNoOfPodsToEvictPerNode != nil && evictionCounter >= *maxNoOfPodsToEvictPerNode {
klog.V(3).InfoS("Max number of evictions per node per plugin reached", "limit", *maxNoOfPodsToEvictPerNode)
break
}
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
continue
@@ -334,6 +341,7 @@ func evictPods(
}
err = podEvictor.Evict(ctx, pod, evictOptions)
if err == nil {
evictionCounter++
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
for name := range totalAvailableUsage {

View File

@@ -34,6 +34,9 @@ type LowNodeUtilizationArgs struct {
// considered while considering resources used by pods
// but then filtered out before eviction
EvictableNamespaces *api.Namespaces `json:"evictableNamespaces,omitempty"`
// evictionLimits limits the number of evictions per domain. E.g. node, namespace, total.
EvictionLimits *api.EvictionLimits `json:"evictionLimits,omitempty"`
}
// +k8s:deepcopy-gen=true

View File

@@ -88,6 +88,11 @@ func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) {
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.EvictionLimits != nil {
in, out := &in.EvictionLimits, &out.EvictionLimits
*out = new(api.EvictionLimits)
(*in).DeepCopyInto(*out)
}
return
}