mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 21:31:18 +01:00
lownodeutilization: evictionLimits to limit the evictions per plugin
In some cases it might be usefull to limit how many evictions per a domain can be performed. To avoid burning the whole per descheduling cycle budget. Limiting the number of evictions per node is a prerequisite for evicting pods whose usage can't be easily subtracted from overall node resource usage to predict the final usage. E.g. when a pod is evicted due to high PSI pressure which takes into account many factors which can be fully captured by the current predictive resource model.
This commit is contained in:
@@ -161,6 +161,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr
|
||||
h.resourceNames,
|
||||
continueEvictionCond,
|
||||
h.usageClient,
|
||||
nil,
|
||||
)
|
||||
|
||||
return nil
|
||||
|
||||
@@ -183,6 +183,11 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
// Sort the nodes by the usage in descending order
|
||||
sortNodesByUsage(sourceNodes, false)
|
||||
|
||||
var nodeLimit *uint
|
||||
if l.args.EvictionLimits != nil {
|
||||
nodeLimit = l.args.EvictionLimits.Node
|
||||
}
|
||||
|
||||
evictPodsFromSourceNodes(
|
||||
ctx,
|
||||
l.args.EvictableNamespaces,
|
||||
@@ -194,6 +199,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
|
||||
l.resourceNames,
|
||||
continueEvictionCond,
|
||||
l.usageClient,
|
||||
nodeLimit,
|
||||
)
|
||||
|
||||
return nil
|
||||
|
||||
@@ -31,6 +31,7 @@ import (
|
||||
core "k8s.io/client-go/testing"
|
||||
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
|
||||
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
|
||||
"k8s.io/utils/ptr"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
@@ -63,6 +64,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
expectedPodsWithMetricsEvicted uint
|
||||
evictedPods []string
|
||||
evictableNamespaces *api.Namespaces
|
||||
evictionLimits *api.EvictionLimits
|
||||
}{
|
||||
{
|
||||
name: "no evictable pods",
|
||||
@@ -1122,6 +1124,72 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
expectedPodsEvicted: 3,
|
||||
expectedPodsWithMetricsEvicted: 2,
|
||||
},
|
||||
{
|
||||
name: "without priorities with node eviction limit",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
targetThresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 50,
|
||||
v1.ResourcePods: 50,
|
||||
},
|
||||
evictionLimits: &api.EvictionLimits{
|
||||
Node: ptr.To[uint](2),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A pod with local storage.
|
||||
test.SetNormalOwnerRef(pod)
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
nodemetricses: []*v1beta1.NodeMetrics{
|
||||
test.BuildNodeMetrics(n1NodeName, 3201, 0),
|
||||
test.BuildNodeMetrics(n2NodeName, 401, 0),
|
||||
test.BuildNodeMetrics(n3NodeName, 11, 0),
|
||||
},
|
||||
podmetricses: []*v1beta1.PodMetrics{
|
||||
test.BuildPodMetrics("p1", 401, 0),
|
||||
test.BuildPodMetrics("p2", 401, 0),
|
||||
test.BuildPodMetrics("p3", 401, 0),
|
||||
test.BuildPodMetrics("p4", 401, 0),
|
||||
test.BuildPodMetrics("p5", 401, 0),
|
||||
},
|
||||
expectedPodsEvicted: 2,
|
||||
expectedPodsWithMetricsEvicted: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -1193,6 +1261,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
Thresholds: tc.thresholds,
|
||||
TargetThresholds: tc.targetThresholds,
|
||||
UseDeviationThresholds: tc.useDeviationThresholds,
|
||||
EvictionLimits: tc.evictionLimits,
|
||||
EvictableNamespaces: tc.evictableNamespaces,
|
||||
MetricsUtilization: MetricsUtilization{
|
||||
MetricsServer: metricsEnabled,
|
||||
|
||||
@@ -239,6 +239,7 @@ func evictPodsFromSourceNodes(
|
||||
resourceNames []v1.ResourceName,
|
||||
continueEviction continueEvictionCond,
|
||||
usageClient usageClient,
|
||||
maxNoOfPodsToEvictPerNode *uint,
|
||||
) {
|
||||
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
|
||||
totalAvailableUsage := api.ReferencedResourceList{}
|
||||
@@ -280,7 +281,7 @@ func evictPodsFromSourceNodes(
|
||||
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
|
||||
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
|
||||
err := evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, evictOptions, continueEviction, usageClient)
|
||||
err := evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, evictOptions, continueEviction, usageClient, maxNoOfPodsToEvictPerNode)
|
||||
if err != nil {
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionTotalLimitError:
|
||||
@@ -302,14 +303,20 @@ func evictPods(
|
||||
evictOptions evictions.EvictOptions,
|
||||
continueEviction continueEvictionCond,
|
||||
usageClient usageClient,
|
||||
maxNoOfPodsToEvictPerNode *uint,
|
||||
) error {
|
||||
var excludedNamespaces sets.Set[string]
|
||||
if evictableNamespaces != nil {
|
||||
excludedNamespaces = sets.New(evictableNamespaces.Exclude...)
|
||||
}
|
||||
|
||||
var evictionCounter uint = 0
|
||||
if continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
for _, pod := range inputPods {
|
||||
if maxNoOfPodsToEvictPerNode != nil && evictionCounter >= *maxNoOfPodsToEvictPerNode {
|
||||
klog.V(3).InfoS("Max number of evictions per node per plugin reached", "limit", *maxNoOfPodsToEvictPerNode)
|
||||
break
|
||||
}
|
||||
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
||||
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
|
||||
continue
|
||||
@@ -334,6 +341,7 @@ func evictPods(
|
||||
}
|
||||
err = podEvictor.Evict(ctx, pod, evictOptions)
|
||||
if err == nil {
|
||||
evictionCounter++
|
||||
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
|
||||
|
||||
for name := range totalAvailableUsage {
|
||||
|
||||
@@ -34,6 +34,9 @@ type LowNodeUtilizationArgs struct {
|
||||
// considered while considering resources used by pods
|
||||
// but then filtered out before eviction
|
||||
EvictableNamespaces *api.Namespaces `json:"evictableNamespaces,omitempty"`
|
||||
|
||||
// evictionLimits limits the number of evictions per domain. E.g. node, namespace, total.
|
||||
EvictionLimits *api.EvictionLimits `json:"evictionLimits,omitempty"`
|
||||
}
|
||||
|
||||
// +k8s:deepcopy-gen=true
|
||||
|
||||
@@ -88,6 +88,11 @@ func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) {
|
||||
*out = new(api.Namespaces)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.EvictionLimits != nil {
|
||||
in, out := &in.EvictionLimits, &out.EvictionLimits
|
||||
*out = new(api.EvictionLimits)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user