mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
PodEvictor: turn an exceeded limit into an error
When checking for node limit getting exceeded the pod eviction never fails. Thus, ignoring the metric reporting when a pod fails to be evicted due to node limit constrains. The error also allows plugin to react on other limits getting exceeded. E.g. the limit on the number of pods evicted per namespace.
This commit is contained in:
@@ -46,10 +46,6 @@ func (hi *HandleImpl) PreEvictionFilter(pod *v1.Pod) bool {
|
||||
return hi.EvictorFilterImpl.PreEvictionFilter(pod)
|
||||
}
|
||||
|
||||
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
|
||||
func (hi *HandleImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
|
||||
return hi.PodEvictorImpl.EvictPod(ctx, pod, opts)
|
||||
}
|
||||
|
||||
func (hi *HandleImpl) NodeLimitExceeded(node *v1.Node) bool {
|
||||
return hi.PodEvictorImpl.NodeLimitExceeded(node)
|
||||
}
|
||||
|
||||
@@ -311,42 +311,48 @@ func evictPods(
|
||||
continue
|
||||
}
|
||||
|
||||
if preEvictionFilterWithOptions(pod) {
|
||||
if podEvictor.Evict(ctx, pod, evictOptions) {
|
||||
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
|
||||
if !preEvictionFilterWithOptions(pod) {
|
||||
continue
|
||||
}
|
||||
err = podEvictor.Evict(ctx, pod, evictOptions)
|
||||
if err == nil {
|
||||
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod))
|
||||
|
||||
for name := range totalAvailableUsage {
|
||||
if name == v1.ResourcePods {
|
||||
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
} else {
|
||||
quantity := utils.GetResourceRequestQuantity(pod, name)
|
||||
nodeInfo.usage[name].Sub(quantity)
|
||||
totalAvailableUsage[name].Sub(quantity)
|
||||
}
|
||||
}
|
||||
|
||||
keysAndValues := []interface{}{
|
||||
"node", nodeInfo.node.Name,
|
||||
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
|
||||
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
|
||||
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
// check if pods can be still evicted
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
break
|
||||
for name := range totalAvailableUsage {
|
||||
if name == v1.ResourcePods {
|
||||
nodeInfo.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
|
||||
} else {
|
||||
quantity := utils.GetResourceRequestQuantity(pod, name)
|
||||
nodeInfo.usage[name].Sub(quantity)
|
||||
totalAvailableUsage[name].Sub(quantity)
|
||||
}
|
||||
}
|
||||
|
||||
keysAndValues := []interface{}{
|
||||
"node", nodeInfo.node.Name,
|
||||
"CPU", nodeInfo.usage[v1.ResourceCPU].MilliValue(),
|
||||
"Mem", nodeInfo.usage[v1.ResourceMemory].Value(),
|
||||
"Pods", nodeInfo.usage[v1.ResourcePods].Value(),
|
||||
}
|
||||
for name := range totalAvailableUsage {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(3).InfoS("Updated node usage", keysAndValues...)
|
||||
// check if pods can be still evicted
|
||||
if !continueEviction(nodeInfo, totalAvailableUsage) {
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if podEvictor.NodeLimitExceeded(nodeInfo.node) {
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
return
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,9 +131,17 @@ func (d *PodLifeTime) Deschedule(ctx context.Context, nodes []*v1.Node) *framewo
|
||||
// in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction
|
||||
podutil.SortPodsBasedOnAge(podsToEvict)
|
||||
|
||||
loop:
|
||||
for _, pod := range podsToEvict {
|
||||
if !d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
|
||||
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
continue loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -210,9 +210,15 @@ func (r *RemoveDuplicates) Balance(ctx context.Context, nodes []*v1.Node) *frame
|
||||
// It's assumed all duplicated pods are in the same priority class
|
||||
// TODO(jchaloup): check if the pod has a different node to lend to
|
||||
for _, pod := range pods[upperAvg-1:] {
|
||||
r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if r.handle.Evictor().NodeLimitExceeded(nodeMap[nodeName]) {
|
||||
err := r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
continue loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,10 +102,17 @@ func (d *RemoveFailedPods) Deschedule(ctx context.Context, nodes []*v1.Node) *fr
|
||||
}
|
||||
}
|
||||
totalPods := len(pods)
|
||||
loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,10 +122,17 @@ func (d *RemovePodsHavingTooManyRestarts) Deschedule(ctx context.Context, nodes
|
||||
}
|
||||
}
|
||||
totalPods := len(pods)
|
||||
loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,7 +98,8 @@ loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
if utils.CheckPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) {
|
||||
if d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
|
||||
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
// Since the current pod is evicted all other pods which have anti-affinity with this
|
||||
// pod need not be evicted.
|
||||
// Update allPods.
|
||||
@@ -106,12 +107,16 @@ loop:
|
||||
pods = append(pods[:i], pods[i+1:]...)
|
||||
i--
|
||||
totalPods--
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
continue loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -134,11 +134,18 @@ func (d *RemovePodsViolatingNodeAffinity) processNodes(ctx context.Context, node
|
||||
}
|
||||
}
|
||||
|
||||
loop:
|
||||
for _, pod := range pods {
|
||||
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
|
||||
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,6 +114,7 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
|
||||
}
|
||||
}
|
||||
totalPods := len(pods)
|
||||
loop:
|
||||
for i := 0; i < totalPods; i++ {
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(
|
||||
pods[i].Spec.Tolerations,
|
||||
@@ -121,9 +122,15 @@ func (d *RemovePodsViolatingNodeTaints) Deschedule(ctx context.Context, nodes []
|
||||
d.taintFilterFnc,
|
||||
) {
|
||||
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
|
||||
d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if d.handle.Evictor().NodeLimitExceeded(node) {
|
||||
break
|
||||
err := d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
break loop
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,10 +235,16 @@ func (d *RemovePodsViolatingTopologySpreadConstraint) Balance(ctx context.Contex
|
||||
}
|
||||
|
||||
if d.handle.Evictor().PreEvictionFilter(pod) {
|
||||
d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
}
|
||||
if d.handle.Evictor().NodeLimitExceeded(nodeMap[pod.Spec.NodeName]) {
|
||||
nodeLimitExceeded[pod.Spec.NodeName] = true
|
||||
err := d.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{StrategyName: PluginName})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
switch err.(type) {
|
||||
case *evictions.EvictionNodeLimitError:
|
||||
nodeLimitExceeded[pod.Spec.NodeName] = true
|
||||
default:
|
||||
klog.Errorf("eviction failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,15 +59,11 @@ func (ei *evictorImpl) PreEvictionFilter(pod *v1.Pod) bool {
|
||||
}
|
||||
|
||||
// Evict evicts a pod (no pre-check performed)
|
||||
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
|
||||
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) error {
|
||||
opts.ProfileName = ei.profileName
|
||||
return ei.podEvictor.EvictPod(ctx, pod, opts)
|
||||
}
|
||||
|
||||
func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool {
|
||||
return ei.podEvictor.NodeLimitExceeded(node)
|
||||
}
|
||||
|
||||
// handleImpl implements the framework handle which gets passed to plugins
|
||||
type handleImpl struct {
|
||||
clientSet clientset.Interface
|
||||
|
||||
@@ -185,10 +185,11 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
|
||||
if test.extensionPoint == frameworktypes.DescheduleExtensionPoint {
|
||||
fakePlugin.AddReactor(string(frameworktypes.DescheduleExtensionPoint), func(action fakeplugin.Action) (handled, filter bool, err error) {
|
||||
if dAction, ok := action.(fakeplugin.DescheduleAction); ok {
|
||||
if dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName}) {
|
||||
err := dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName})
|
||||
if err == nil {
|
||||
return true, false, nil
|
||||
}
|
||||
return true, false, fmt.Errorf("pod not evicted")
|
||||
return true, false, fmt.Errorf("pod not evicted: %v", err)
|
||||
}
|
||||
return false, false, nil
|
||||
})
|
||||
@@ -196,10 +197,11 @@ func TestProfileDescheduleBalanceExtensionPointsEviction(t *testing.T) {
|
||||
if test.extensionPoint == frameworktypes.BalanceExtensionPoint {
|
||||
fakePlugin.AddReactor(string(frameworktypes.BalanceExtensionPoint), func(action fakeplugin.Action) (handled, filter bool, err error) {
|
||||
if dAction, ok := action.(fakeplugin.BalanceAction); ok {
|
||||
if dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName}) {
|
||||
err := dAction.Handle().Evictor().Evict(ctx, p1, evictions.EvictOptions{StrategyName: fakePlugin.PluginName})
|
||||
if err == nil {
|
||||
return true, false, nil
|
||||
}
|
||||
return true, false, fmt.Errorf("pod not evicted")
|
||||
return true, false, fmt.Errorf("pod not evicted: %v", err)
|
||||
}
|
||||
return false, false, nil
|
||||
})
|
||||
|
||||
@@ -46,9 +46,7 @@ type Evictor interface {
|
||||
// PreEvictionFilter checks if pod can be evicted right before eviction
|
||||
PreEvictionFilter(*v1.Pod) bool
|
||||
// Evict evicts a pod (no pre-check performed)
|
||||
Evict(context.Context, *v1.Pod, evictions.EvictOptions) bool
|
||||
// NodeLimitExceeded checks if the number of evictions for a node was exceeded
|
||||
NodeLimitExceeded(node *v1.Node) bool
|
||||
Evict(context.Context, *v1.Pod, evictions.EvictOptions) error
|
||||
}
|
||||
|
||||
// Status describes result of an extension point invocation
|
||||
|
||||
Reference in New Issue
Block a user