1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

Add RemovePodsHavingTooManyRestarts strategy

This commit is contained in:
Mike Dame
2020-04-01 16:10:56 -04:00
parent 3a8dfc07ed
commit e7c42794a0
11 changed files with 377 additions and 0 deletions

View File

@@ -48,6 +48,7 @@ type DeschedulerStrategy struct {
type StrategyParameters struct {
NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds
NodeAffinityType []string
PodsHavingTooManyRestarts PodsHavingTooManyRestarts
}
type Percentage float64
@@ -58,3 +59,8 @@ type NodeResourceUtilizationThresholds struct {
TargetThresholds ResourceThresholds
NumberOfNodes int
}
type PodsHavingTooManyRestarts struct {
PodRestartThreshold int32
IncludingInitContainers bool
}

View File

@@ -48,6 +48,7 @@ type DeschedulerStrategy struct {
type StrategyParameters struct {
NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
PodsHavingTooManyRestarts PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
}
type Percentage float64
@@ -58,3 +59,8 @@ type NodeResourceUtilizationThresholds struct {
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
}
type PodsHavingTooManyRestarts struct {
PodRestartThreshold int32 `json:"podRestartThreshold,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
}

View File

@@ -65,6 +65,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodsHavingTooManyRestarts)(nil), (*api.PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(a.(*PodsHavingTooManyRestarts), b.(*api.PodsHavingTooManyRestarts), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.PodsHavingTooManyRestarts)(nil), (*PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(a.(*api.PodsHavingTooManyRestarts), b.(*PodsHavingTooManyRestarts), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*StrategyParameters)(nil), (*api.StrategyParameters)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_StrategyParameters_To_api_StrategyParameters(a.(*StrategyParameters), b.(*api.StrategyParameters), scope)
}); err != nil {
@@ -150,11 +160,36 @@ func Convert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtili
return autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in, out, s)
}
func autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
out.PodRestartThreshold = in.PodRestartThreshold
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts is an autogenerated conversion function.
func Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
return autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in, out, s)
}
func autoConvert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in *api.PodsHavingTooManyRestarts, out *PodsHavingTooManyRestarts, s conversion.Scope) error {
out.PodRestartThreshold = in.PodRestartThreshold
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts is an autogenerated conversion function.
func Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in *api.PodsHavingTooManyRestarts, out *PodsHavingTooManyRestarts, s conversion.Scope) error {
return autoConvert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in, out, s)
}
func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *StrategyParameters, out *api.StrategyParameters, s conversion.Scope) error {
if err := Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(&in.NodeResourceUtilizationThresholds, &out.NodeResourceUtilizationThresholds, s); err != nil {
return err
}
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
if err := Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(&in.PodsHavingTooManyRestarts, &out.PodsHavingTooManyRestarts, s); err != nil {
return err
}
return nil
}
@@ -168,6 +203,9 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
return err
}
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
if err := Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(&in.PodsHavingTooManyRestarts, &out.PodsHavingTooManyRestarts, s); err != nil {
return err
}
return nil
}

View File

@@ -103,6 +103,22 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodsHavingTooManyRestarts.
func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
if in == nil {
return nil
}
out := new(PodsHavingTooManyRestarts)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) {
{
@@ -156,6 +172,7 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = make([]string, len(*in))
copy(*out, *in)
}
out.PodsHavingTooManyRestarts = in.PodsHavingTooManyRestarts
return
}

View File

@@ -103,6 +103,22 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodsHavingTooManyRestarts.
func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
if in == nil {
return nil
}
out := new(PodsHavingTooManyRestarts)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) {
{
@@ -156,6 +172,7 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = make([]string, len(*in))
copy(*out, *in)
}
out.PodsHavingTooManyRestarts = in.PodsHavingTooManyRestarts
return
}

View File

@@ -90,6 +90,7 @@ func RunDeschedulerStrategies(rs *options.DeschedulerServer, deschedulerPolicy *
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], nodes, podEvictor)
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], nodes, podEvictor)
strategies.RemovePodsViolatingNodeTaints(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeTaints"], nodes, podEvictor)
strategies.RemovePodsHavingTooManyRestarts(rs, deschedulerPolicy.Strategies["RemovePodsHavingTooManyRestarts"], nodes, podEvictor)
// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
if rs.DeschedulingInterval.Seconds() == 0 {

View File

@@ -0,0 +1,77 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"k8s.io/api/core/v1"
"k8s.io/klog"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
)
// RemovePodsHavingTooManyRestarts removes the pods that have too many restarts on node.
// There are too many cases leading this issue: Volume mount failed, app error due to nodes' different settings.
// As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages.
func RemovePodsHavingTooManyRestarts(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if !strategy.Enabled || strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold < 1 {
return
}
removePodsHavingTooManyRestarts(ds, strategy, nodes, podEvictor, ds.EvictLocalStoragePods)
}
func removePodsHavingTooManyRestarts(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, evictLocalStoragePods bool) {
for _, node := range nodes {
klog.V(1).Infof("Processing node: %s", node.Name)
pods, err := podutil.ListEvictablePodsOnNode(ds.Client, node, evictLocalStoragePods)
if err != nil {
klog.Errorf("Error when list pods at node %s", node.Name)
continue
}
for i, pod := range pods {
restarts, initRestarts := calcContainerRestarts(pod)
if strategy.Params.PodsHavingTooManyRestarts.IncludingInitContainers {
if restarts+initRestarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold {
continue
}
} else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold {
continue
}
if _, err := podEvictor.EvictPod(pods[i], node); err != nil {
break
}
}
}
}
// calcContainerRestarts get container restarts and init container restarts.
func calcContainerRestarts(pod *v1.Pod) (int32, int32) {
var restarts, initRestarts int32
for _, cs := range pod.Status.ContainerStatuses {
restarts += cs.RestartCount
}
for _, cs := range pod.Status.InitContainerStatuses {
initRestarts += cs.RestartCount
}
return restarts, initRestarts
}

View File

@@ -0,0 +1,191 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"testing"
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/test"
)
func initPods(node *v1.Node) []v1.Pod {
pods := make([]v1.Pod, 0)
for i := int32(0); i <= 9; i++ {
pod := test.BuildTestPod(fmt.Sprintf("pod-%d", i), 100, 0, node.Name, nil)
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
// pod at index i will have 25 * i restarts.
pod.Status = v1.PodStatus{
InitContainerStatuses: []v1.ContainerStatus{
{
RestartCount: 5 * i,
},
},
ContainerStatuses: []v1.ContainerStatus{
{
RestartCount: 10 * i,
},
{
RestartCount: 10 * i,
},
},
}
pods = append(pods, *pod)
}
// The following 3 pods won't get evicted.
// A daemonset.
pods[6].ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A pod with local storage.
pods[7].ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pods[7].Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pods[8].Annotations = test.GetMirrorPodAnnotation()
return pods
}
func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32) api.DeschedulerStrategy {
return api.DeschedulerStrategy{
Enabled: enabled,
Params: api.StrategyParameters{
PodsHavingTooManyRestarts: api.PodsHavingTooManyRestarts{
PodRestartThreshold: restartThresholds,
IncludingInitContainers: includingInitContainers,
},
},
}
}
node := test.BuildTestNode("node1", 2000, 3000, 10, nil)
pods := initPods(node)
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: pods}, nil
})
tests := []struct {
description string
pods []v1.Pod
strategy api.DeschedulerStrategy
expectedEvictedPodCount int
maxPodsToEvict int
}{
{
description: "All pods have total restarts under threshold, no pod evictions",
strategy: createStrategy(true, true, 10000),
expectedEvictedPodCount: 0,
maxPodsToEvict: 0,
},
{
description: "Some pods have total restarts bigger than threshold",
strategy: createStrategy(true, true, 1),
expectedEvictedPodCount: 6,
maxPodsToEvict: 0,
},
{
description: "Nine pods have total restarts equals threshold(includingInitContainers=true), 6 pods evictions",
strategy: createStrategy(true, true, 1*25),
expectedEvictedPodCount: 6,
maxPodsToEvict: 0,
},
{
description: "Nine pods have total restarts equals threshold(includingInitContainers=false), 5 pods evictions",
strategy: createStrategy(true, false, 1*25),
expectedEvictedPodCount: 5,
maxPodsToEvict: 0,
},
{
description: "All pods have total restarts equals threshold(includingInitContainers=true), 6 pods evictions",
strategy: createStrategy(true, true, 1*20),
expectedEvictedPodCount: 6,
maxPodsToEvict: 0,
},
{
description: "Nine pods have total restarts equals threshold(includingInitContainers=false), 6 pods evictions",
strategy: createStrategy(true, false, 1*20),
expectedEvictedPodCount: 6,
maxPodsToEvict: 0,
},
{
description: "Five pods have total restarts bigger than threshold(includingInitContainers=true), but only 1 pod eviction",
strategy: createStrategy(true, true, 5*25+1),
expectedEvictedPodCount: 1,
maxPodsToEvict: 0,
},
{
description: "Five pods have total restarts bigger than threshold(includingInitContainers=false), but only 1 pod eviction",
strategy: createStrategy(true, false, 5*20+1),
expectedEvictedPodCount: 1,
maxPodsToEvict: 0,
},
{
description: "All pods have total restarts equals threshold(maxPodsToEvict=3), 3 pods evictions",
strategy: createStrategy(true, true, 1),
expectedEvictedPodCount: 3,
maxPodsToEvict: 3,
},
}
for _, tc := range tests {
ds := options.DeschedulerServer{
DeschedulerConfiguration: componentconfig.DeschedulerConfiguration{
MaxNoOfPodsToEvictPerNode: tc.maxPodsToEvict,
},
Client: fakeClient,
}
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
tc.maxPodsToEvict,
[]*v1.Node{node},
)
removePodsHavingTooManyRestarts(&ds, tc.strategy, []*v1.Node{node}, podEvictor, ds.EvictLocalStoragePods)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
}
}
}