1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

PodEvictor: turn an exceeded limit into an error

When checking for node limit getting exceeded the pod eviction
never fails. Thus, ignoring the metric reporting when a pod fails
to be evicted due to node limit constrains.

The error also allows plugin to react on other limits getting
exceeded. E.g. the limit on the number of pods evicted per namespace.
This commit is contained in:
Jan Chaloupka
2024-07-06 20:02:55 +02:00
parent 7657345079
commit 18d0e4a540
17 changed files with 182 additions and 104 deletions

View File

@@ -64,14 +64,10 @@ func (ei *evictorImpl) PreEvictionFilter(pod *v1.Pod) bool {
}
// Evict evicts a pod (no pre-check performed)
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
return ei.podEvictor.EvictPod(ctx, pod, opts)
}
func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool {
return ei.podEvictor.NodeLimitExceeded(node)
}
// handleImpl implements the framework handle which gets passed to plugins
type handleImpl struct {
clientSet clientset.Interface

View File

@@ -0,0 +1,33 @@
package evictions
type EvictionNodeLimitError struct {
node string
}
func (e EvictionNodeLimitError) Error() string {
return "maximum number of evicted pods per node reached"
}
func NewEvictionNodeLimitError(node string) *EvictionNodeLimitError {
return &EvictionNodeLimitError{
node: node,
}
}
var _ error = &EvictionNodeLimitError{}
type EvictionNamespaceLimitError struct {
namespace string
}
func (e EvictionNamespaceLimitError) Error() string {
return "maximum number of evicted pods per namespace reached"
}
func NewEvictionNamespaceLimitError(namespace string) *EvictionNamespaceLimitError {
return &EvictionNamespaceLimitError{
namespace: namespace,
}
}
var _ error = &EvictionNamespaceLimitError{}

View File

@@ -89,14 +89,6 @@ func (pe *PodEvictor) TotalEvicted() uint {
return total
}
// NodeLimitExceeded checks if the number of evictions for a node was exceeded
func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool {
if pe.maxPodsToEvictPerNode != nil {
return pe.nodepodCount[node.Name] == *pe.maxPodsToEvictPerNode
}
return false
}
func (pe *PodEvictor) ResetCounters() {
pe.nodepodCount = make(nodePodEvictedCount)
pe.namespacePodCount = make(namespacePodEvictCount)
@@ -118,29 +110,31 @@ type EvictOptions struct {
// EvictPod evicts a pod while exercising eviction limits.
// Returns true when the pod is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) bool {
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) error {
var span trace.Span
ctx, span = tracing.Tracer().Start(ctx, "EvictPod", trace.WithAttributes(attribute.String("podName", pod.Name), attribute.String("podNamespace", pod.Namespace), attribute.String("reason", opts.Reason), attribute.String("operation", tracing.EvictOperation)))
defer span.End()
if pod.Spec.NodeName != "" {
if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[pod.Spec.NodeName]+1 > *pe.maxPodsToEvictPerNode {
err := NewEvictionNodeLimitError(pod.Spec.NodeName)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
metrics.PodsEvicted.With(map[string]string{"result": err.Error(), "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", "Maximum number of evicted pods per node reached")))
klog.ErrorS(fmt.Errorf("maximum number of evicted pods per node reached"), "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
return false
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
return err
}
}
if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
err := NewEvictionNamespaceLimitError(pod.Namespace)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
metrics.PodsEvicted.With(map[string]string{"result": err.Error(), "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", "Maximum number of evicted pods per namespace reached")))
klog.ErrorS(fmt.Errorf("maximum number of evicted pods per namespace reached"), "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
return false
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
return err
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
@@ -151,7 +145,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
return false
return err
}
if pod.Spec.NodeName != "" {
@@ -176,7 +170,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
}
return true
return nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {

View File

@@ -141,12 +141,8 @@ func TestNewPodEvictor(t *testing.T) {
t.Errorf("Expected 0 total evictions, got %q instead", evictions)
}
if podEvictor.NodeLimitExceeded(stubNode) {
t.Errorf("Expected NodeLimitExceeded to return false, got true instead")
}
if !podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}) {
t.Errorf("Expected a pod eviction, got no eviction instead")
if err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}); err != nil {
t.Errorf("Expected a pod eviction, got an eviction error instead: %v", err)
}
// 1 node eviction expected
@@ -157,7 +153,15 @@ func TestNewPodEvictor(t *testing.T) {
if evictions := podEvictor.TotalEvicted(); evictions != 1 {
t.Errorf("Expected 1 total evictions, got %q instead", evictions)
}
if !podEvictor.NodeLimitExceeded(stubNode) {
t.Errorf("Expected NodeLimitExceeded to return true, got false instead")
err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{})
if err == nil {
t.Errorf("Expected a pod eviction error, got nil instead")
}
switch err.(type) {
case *EvictionNodeLimitError:
// all good
default:
t.Errorf("Expected a pod eviction EvictionNodeLimitError error, got a different error instead: %v", err)
}
}

View File

@@ -46,10 +46,6 @@ func (hi *HandleImpl) PreEvictionFilter(pod *v1.Pod) bool {
return hi.EvictorFilterImpl.PreEvictionFilter(pod)
}
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
return hi.PodEvictorImpl.EvictPod(ctx, pod, opts)
}
func (hi *HandleImpl) NodeLimitExceeded(node *v1.Node) bool {
return hi.PodEvictorImpl.NodeLimitExceeded(node)
}

View File

@@ -311,42 +311,48 @@ func evictPods(
continue
}
if preEvictionFilterWithOptions(pod) {
if podEvictor.Evict(ctx, pod, evictOptions) {
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
if !preEvictionFilterWithOptions(pod) {
continue
}
err = podEvictor.Evict(ctx, pod, evictOptions)
if err == nil {
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
for name := range totalAvailableUsage {
if name == v1.ResourcePods {
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
} else {
quantity := utils.GetResourceRequestQuantity(pod, name)
nodeInfo.usage[name].Sub(quantity)
totalAvailableUsage[name].Sub(quantity)
}
}
keysAndValues := []interface{}{
"node", nodeInfo.node.Name,
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if pods can be still evicted
if !continueEviction(nodeInfo, totalAvailableUsage) {
break
for name := range totalAvailableUsage {
if name == v1.ResourcePods {
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
} else {
quantity := utils.GetResourceRequestQuantity(pod, name)
nodeInfo.usage[name].Sub(quantity)
totalAvailableUsage[name].Sub(quantity)
}
}
keysAndValues := []interface{}{
"node", nodeInfo.node.Name,
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if pods can be still evicted
if !continueEviction(nodeInfo, totalAvailableUsage) {
break
}
continue
}
if podEvictor.NodeLimitExceeded(nodeInfo.node) {
switch err.(type) {
case *evictions.EvictionNodeLimitError:
return
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -131,9 +131,17 @@ func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *framewo
// in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction
podutil.SortPodsBasedOnAge(podsToEvict)
loop:
for _, pod := range podsToEvict {
if !d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
continue loop
default:
klog.Errorf("eviction failed: %v", err)
}
}

View File

@@ -210,9 +210,15 @@ func (r *RemoveDuplicates) Balance(ctx context.Context, nodes []*v1.Node) *frame
// It's assumed all duplicated pods are in the same priority class
// TODO(jchaloup): check if the pod has a different node to lend to
for _, pod := range pods[upperAvg-1:] {
r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if r.handle.Evictor().NodeLimitExceeded(nodeMap[nodeName]) {
err := r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
continue loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -102,10 +102,17 @@ func (d *RemoveFailedPods) Deschedule(ctx context.Context, nodes []*v1.Node) *fr
}
}
totalPods := len(pods)
loop:
for i := 0; i < totalPods; i++ {
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -122,10 +122,17 @@ func (d *RemovePodsHavingTooManyRestarts) Deschedule(ctx context.Context, nodes
}
}
totalPods := len(pods)
loop:
for i := 0; i < totalPods; i++ {
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -98,7 +98,8 @@ loop:
for i := 0; i < totalPods; i++ {
if utils.CheckPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) {
if d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
// Since the current pod is evicted all other pods which have anti-affinity with this
// pod need not be evicted.
// Update allPods.
@@ -106,12 +107,16 @@ loop:
pods = append(pods[:i], pods[i+1:]...)
i--
totalPods--
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
continue loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}
if d.handle.Evictor().NodeLimitExceeded(node) {
continue loop
}
}
}
return nil

View File

@@ -134,11 +134,18 @@ func (d *RemovePodsViolatingNodeAffinity) processNodes(ctx context.Context, node
}
}
loop:
for _, pod := range pods {
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -114,6 +114,7 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
}
}
totalPods := len(pods)
loop:
for i := 0; i < totalPods; i++ {
if !utils.TolerationsTolerateTaintsWithFilter(
pods[i].Spec.Tolerations,
@@ -121,9 +122,15 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
d.taintFilterFnc,
) {
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -235,10 +235,16 @@ func (d *RemovePodsViolatingTopologySpreadConstraint) Balance(ctx context.Contex
}
if d.handle.Evictor().PreEvictionFilter(pod) {
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
}
if d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
nodeLimitExceeded[pod.Spec.NodeName] = true
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
nodeLimitExceeded[pod.Spec.NodeName] = true
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -59,15 +59,11 @@ func (ei *evictorImpl) PreEvictionFilter(pod *v1.Pod) bool {
}
// Evict evicts a pod (no pre-check performed)
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
opts.ProfileName = ei.profileName
return ei.podEvictor.EvictPod(ctx, pod, opts)
}
func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool {
return ei.podEvictor.NodeLimitExceeded(node)
}
// handleImpl implements the framework handle which gets passed to plugins
type handleImpl struct {
clientSet clientset.Interface

View File

@@ -185,10 +185,11 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
if test.extensionPoint == frameworktypes.DescheduleExtensionPoint {
fakePlugin.AddReactor(string(frameworktypes.DescheduleExtensionPoint), func(action fakeplugin.Action) (handled, filter bool, err error) {
if dAction, ok := action.(fakeplugin.DescheduleAction); ok {
if dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName}) {
err := dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName})
if err == nil {
return true, false, nil
}
return true, false, fmt.Errorf("pod not evicted")
return true, false, fmt.Errorf("pod not evicted: %v", err)
}
return false, false, nil
})
@@ -196,10 +197,11 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
if test.extensionPoint == frameworktypes.BalanceExtensionPoint {
fakePlugin.AddReactor(string(frameworktypes.BalanceExtensionPoint), func(action fakeplugin.Action) (handled, filter bool, err error) {
if dAction, ok := action.(fakeplugin.BalanceAction); ok {
if dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName}) {
err := dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName})
if err == nil {
return true, false, nil
}
return true, false, fmt.Errorf("pod not evicted")
return true, false, fmt.Errorf("pod not evicted: %v", err)
}
return false, false, nil
})

View File

@@ -46,9 +46,7 @@ type Evictor interface {
// PreEvictionFilter checks if pod can be evicted right before eviction
PreEvictionFilter(*v1.Pod) bool
// Evict evicts a pod (no pre-check performed)
Evict(context.Context, *v1.Pod, evictions.EvictOptions) bool
// NodeLimitExceeded checks if the number of evictions for a node was exceeded
NodeLimitExceeded(node *v1.Node) bool
Evict(context.Context, *v1.Pod, evictions.EvictOptions) error
}
// Status describes result of an extension point invocation