1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #1456 from ingvagabund/limit-exceeded-to-error

PodEvictor: turn an exceeded limit into an error
This commit is contained in:
Kubernetes Prow Robot
2024-07-08 06:25:06 -07:00
committed by GitHub
17 changed files with 182 additions and 104 deletions

View File

@@ -64,14 +64,10 @@ func (ei *evictorImpl) PreEvictionFilter(pod *v1.Pod) bool {
}
// Evict evicts a pod (no pre-check performed)
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
return ei.podEvictor.EvictPod(ctx, pod, opts)
}
func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool {
return ei.podEvictor.NodeLimitExceeded(node)
}
// handleImpl implements the framework handle which gets passed to plugins
type handleImpl struct {
clientSet clientset.Interface

View File

@@ -0,0 +1,33 @@
package evictions
type EvictionNodeLimitError struct {
node string
}
func (e EvictionNodeLimitError) Error() string {
return "maximum number of evicted pods per node reached"
}
func NewEvictionNodeLimitError(node string) *EvictionNodeLimitError {
return &EvictionNodeLimitError{
node: node,
}
}
var _ error = &EvictionNodeLimitError{}
type EvictionNamespaceLimitError struct {
namespace string
}
func (e EvictionNamespaceLimitError) Error() string {
return "maximum number of evicted pods per namespace reached"
}
func NewEvictionNamespaceLimitError(namespace string) *EvictionNamespaceLimitError {
return &EvictionNamespaceLimitError{
namespace: namespace,
}
}
var _ error = &EvictionNamespaceLimitError{}

View File

@@ -89,14 +89,6 @@ func (pe *PodEvictor) TotalEvicted() uint {
return total
}
// NodeLimitExceeded checks if the number of evictions for a node was exceeded
func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool {
if pe.maxPodsToEvictPerNode != nil {
return pe.nodepodCount[node.Name] == *pe.maxPodsToEvictPerNode
}
return false
}
func (pe *PodEvictor) ResetCounters() {
pe.nodepodCount = make(nodePodEvictedCount)
pe.namespacePodCount = make(namespacePodEvictCount)
@@ -118,29 +110,31 @@ type EvictOptions struct {
// EvictPod evicts a pod while exercising eviction limits.
// Returns true when the pod is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) bool {
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) error {
var span trace.Span
ctx, span = tracing.Tracer().Start(ctx, "EvictPod", trace.WithAttributes(attribute.String("podName", pod.Name), attribute.String("podNamespace", pod.Namespace), attribute.String("reason", opts.Reason), attribute.String("operation", tracing.EvictOperation)))
defer span.End()
if pod.Spec.NodeName != "" {
if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[pod.Spec.NodeName]+1 > *pe.maxPodsToEvictPerNode {
err := NewEvictionNodeLimitError(pod.Spec.NodeName)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
metrics.PodsEvicted.With(map[string]string{"result": err.Error(), "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", "Maximum number of evicted pods per node reached")))
klog.ErrorS(fmt.Errorf("maximum number of evicted pods per node reached"), "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
return false
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
return err
}
}
if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
err := NewEvictionNamespaceLimitError(pod.Namespace)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
metrics.PodsEvicted.With(map[string]string{"result": err.Error(), "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", "Maximum number of evicted pods per namespace reached")))
klog.ErrorS(fmt.Errorf("maximum number of evicted pods per namespace reached"), "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
return false
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
return err
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
@@ -151,7 +145,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
return false
return err
}
if pod.Spec.NodeName != "" {
@@ -176,7 +170,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
}
return true
return nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {

View File

@@ -141,12 +141,8 @@ func TestNewPodEvictor(t *testing.T) {
t.Errorf("Expected 0 total evictions, got %q instead", evictions)
}
if podEvictor.NodeLimitExceeded(stubNode) {
t.Errorf("Expected NodeLimitExceeded to return false, got true instead")
}
if !podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}) {
t.Errorf("Expected a pod eviction, got no eviction instead")
if err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}); err != nil {
t.Errorf("Expected a pod eviction, got an eviction error instead: %v", err)
}
// 1 node eviction expected
@@ -157,7 +153,15 @@ func TestNewPodEvictor(t *testing.T) {
if evictions := podEvictor.TotalEvicted(); evictions != 1 {
t.Errorf("Expected 1 total evictions, got %q instead", evictions)
}
if !podEvictor.NodeLimitExceeded(stubNode) {
t.Errorf("Expected NodeLimitExceeded to return true, got false instead")
err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{})
if err == nil {
t.Errorf("Expected a pod eviction error, got nil instead")
}
switch err.(type) {
case *EvictionNodeLimitError:
// all good
default:
t.Errorf("Expected a pod eviction EvictionNodeLimitError error, got a different error instead: %v", err)
}
}

View File

@@ -46,10 +46,6 @@ func (hi *HandleImpl) PreEvictionFilter(pod *v1.Pod) bool {
return hi.EvictorFilterImpl.PreEvictionFilter(pod)
}
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
return hi.PodEvictorImpl.EvictPod(ctx, pod, opts)
}
func (hi *HandleImpl) NodeLimitExceeded(node *v1.Node) bool {
return hi.PodEvictorImpl.NodeLimitExceeded(node)
}

View File

@@ -311,42 +311,48 @@ func evictPods(
continue
}
if preEvictionFilterWithOptions(pod) {
if podEvictor.Evict(ctx, pod, evictOptions) {
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
if !preEvictionFilterWithOptions(pod) {
continue
}
err = podEvictor.Evict(ctx, pod, evictOptions)
if err == nil {
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
for name := range totalAvailableUsage {
if name == v1.ResourcePods {
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
} else {
quantity := utils.GetResourceRequestQuantity(pod, name)
nodeInfo.usage[name].Sub(quantity)
totalAvailableUsage[name].Sub(quantity)
}
}
keysAndValues := []interface{}{
"node", nodeInfo.node.Name,
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if pods can be still evicted
if !continueEviction(nodeInfo, totalAvailableUsage) {
break
for name := range totalAvailableUsage {
if name == v1.ResourcePods {
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
} else {
quantity := utils.GetResourceRequestQuantity(pod, name)
nodeInfo.usage[name].Sub(quantity)
totalAvailableUsage[name].Sub(quantity)
}
}
keysAndValues := []interface{}{
"node", nodeInfo.node.Name,
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !nodeutil.IsBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if pods can be still evicted
if !continueEviction(nodeInfo, totalAvailableUsage) {
break
}
continue
}
if podEvictor.NodeLimitExceeded(nodeInfo.node) {
switch err.(type) {
case *evictions.EvictionNodeLimitError:
return
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -131,9 +131,17 @@ func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *framewo
// in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction
podutil.SortPodsBasedOnAge(podsToEvict)
loop:
for _, pod := range podsToEvict {
if !d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
continue loop
default:
klog.Errorf("eviction failed: %v", err)
}
}

View File

@@ -210,9 +210,15 @@ func (r *RemoveDuplicates) Balance(ctx context.Context, nodes []*v1.Node) *frame
// It's assumed all duplicated pods are in the same priority class
// TODO(jchaloup): check if the pod has a different node to lend to
for _, pod := range pods[upperAvg-1:] {
r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if r.handle.Evictor().NodeLimitExceeded(nodeMap[nodeName]) {
err := r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
continue loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -102,10 +102,17 @@ func (d *RemoveFailedPods) Deschedule(ctx context.Context, nodes []*v1.Node) *fr
}
}
totalPods := len(pods)
loop:
for i := 0; i < totalPods; i++ {
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -122,10 +122,17 @@ func (d *RemovePodsHavingTooManyRestarts) Deschedule(ctx context.Context, nodes
}
}
totalPods := len(pods)
loop:
for i := 0; i < totalPods; i++ {
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -98,7 +98,8 @@ loop:
for i := 0; i < totalPods; i++ {
if utils.CheckPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) {
if d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
// Since the current pod is evicted all other pods which have anti-affinity with this
// pod need not be evicted.
// Update allPods.
@@ -106,12 +107,16 @@ loop:
pods = append(pods[:i], pods[i+1:]...)
i--
totalPods--
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
continue loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}
if d.handle.Evictor().NodeLimitExceeded(node) {
continue loop
}
}
}
return nil

View File

@@ -134,11 +134,18 @@ func (d *RemovePodsViolatingNodeAffinity) processNodes(ctx context.Context, node
}
}
loop:
for _, pod := range pods {
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -114,6 +114,7 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
}
}
totalPods := len(pods)
loop:
for i := 0; i < totalPods; i++ {
if !utils.TolerationsTolerateTaintsWithFilter(
pods[i].Spec.Tolerations,
@@ -121,9 +122,15 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
d.taintFilterFnc,
) {
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if d.handle.Evictor().NodeLimitExceeded(node) {
break
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
break loop
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -235,10 +235,16 @@ func (d *RemovePodsViolatingTopologySpreadConstraint) Balance(ctx context.Contex
}
if d.handle.Evictor().PreEvictionFilter(pod) {
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
}
if d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
nodeLimitExceeded[pod.Spec.NodeName] = true
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
if err == nil {
continue
}
switch err.(type) {
case *evictions.EvictionNodeLimitError:
nodeLimitExceeded[pod.Spec.NodeName] = true
default:
klog.Errorf("eviction failed: %v", err)
}
}
}

View File

@@ -59,15 +59,11 @@ func (ei *evictorImpl) PreEvictionFilter(pod *v1.Pod) bool {
}
// Evict evicts a pod (no pre-check performed)
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
opts.ProfileName = ei.profileName
return ei.podEvictor.EvictPod(ctx, pod, opts)
}
func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool {
return ei.podEvictor.NodeLimitExceeded(node)
}
// handleImpl implements the framework handle which gets passed to plugins
type handleImpl struct {
clientSet clientset.Interface

View File

@@ -185,10 +185,11 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
if test.extensionPoint == frameworktypes.DescheduleExtensionPoint {
fakePlugin.AddReactor(string(frameworktypes.DescheduleExtensionPoint), func(action fakeplugin.Action) (handled, filter bool, err error) {
if dAction, ok := action.(fakeplugin.DescheduleAction); ok {
if dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName}) {
err := dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName})
if err == nil {
return true, false, nil
}
return true, false, fmt.Errorf("pod not evicted")
return true, false, fmt.Errorf("pod not evicted: %v", err)
}
return false, false, nil
})
@@ -196,10 +197,11 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
if test.extensionPoint == frameworktypes.BalanceExtensionPoint {
fakePlugin.AddReactor(string(frameworktypes.BalanceExtensionPoint), func(action fakeplugin.Action) (handled, filter bool, err error) {
if dAction, ok := action.(fakeplugin.BalanceAction); ok {
if dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName}) {
err := dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName})
if err == nil {
return true, false, nil
}
return true, false, fmt.Errorf("pod not evicted")
return true, false, fmt.Errorf("pod not evicted: %v", err)
}
return false, false, nil
})

View File

@@ -46,9 +46,7 @@ type Evictor interface {
// PreEvictionFilter checks if pod can be evicted right before eviction
PreEvictionFilter(*v1.Pod) bool
// Evict evicts a pod (no pre-check performed)
Evict(context.Context, *v1.Pod, evictions.EvictOptions) bool
// NodeLimitExceeded checks if the number of evictions for a node was exceeded
NodeLimitExceeded(node *v1.Node) bool
Evict(context.Context, *v1.Pod, evictions.EvictOptions) error
}
// Status describes result of an extension point invocation