mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 13:29:11 +01:00
Add RemovePodsHavingTooManyRestarts strategy
This commit is contained in:
@@ -48,6 +48,7 @@ type DeschedulerStrategy struct {
|
||||
type StrategyParameters struct {
|
||||
NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds
|
||||
NodeAffinityType []string
|
||||
PodsHavingTooManyRestarts PodsHavingTooManyRestarts
|
||||
}
|
||||
|
||||
type Percentage float64
|
||||
@@ -58,3 +59,8 @@ type NodeResourceUtilizationThresholds struct {
|
||||
TargetThresholds ResourceThresholds
|
||||
NumberOfNodes int
|
||||
}
|
||||
|
||||
type PodsHavingTooManyRestarts struct {
|
||||
PodRestartThreshold int32
|
||||
IncludingInitContainers bool
|
||||
}
|
||||
|
||||
@@ -48,6 +48,7 @@ type DeschedulerStrategy struct {
|
||||
type StrategyParameters struct {
|
||||
NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
|
||||
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
|
||||
PodsHavingTooManyRestarts PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
|
||||
}
|
||||
|
||||
type Percentage float64
|
||||
@@ -58,3 +59,8 @@ type NodeResourceUtilizationThresholds struct {
|
||||
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
}
|
||||
|
||||
type PodsHavingTooManyRestarts struct {
|
||||
PodRestartThreshold int32 `json:"podRestartThreshold,omitempty"`
|
||||
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
|
||||
}
|
||||
|
||||
@@ -65,6 +65,16 @@ func RegisterConversions(s *runtime.Scheme) error {
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*PodsHavingTooManyRestarts)(nil), (*api.PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(a.(*PodsHavingTooManyRestarts), b.(*api.PodsHavingTooManyRestarts), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*api.PodsHavingTooManyRestarts)(nil), (*PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(a.(*api.PodsHavingTooManyRestarts), b.(*PodsHavingTooManyRestarts), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*StrategyParameters)(nil), (*api.StrategyParameters)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_StrategyParameters_To_api_StrategyParameters(a.(*StrategyParameters), b.(*api.StrategyParameters), scope)
|
||||
}); err != nil {
|
||||
@@ -150,11 +160,36 @@ func Convert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtili
|
||||
return autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
|
||||
out.PodRestartThreshold = in.PodRestartThreshold
|
||||
out.IncludingInitContainers = in.IncludingInitContainers
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts is an autogenerated conversion function.
|
||||
func Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
|
||||
return autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in *api.PodsHavingTooManyRestarts, out *PodsHavingTooManyRestarts, s conversion.Scope) error {
|
||||
out.PodRestartThreshold = in.PodRestartThreshold
|
||||
out.IncludingInitContainers = in.IncludingInitContainers
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts is an autogenerated conversion function.
|
||||
func Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in *api.PodsHavingTooManyRestarts, out *PodsHavingTooManyRestarts, s conversion.Scope) error {
|
||||
return autoConvert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *StrategyParameters, out *api.StrategyParameters, s conversion.Scope) error {
|
||||
if err := Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(&in.NodeResourceUtilizationThresholds, &out.NodeResourceUtilizationThresholds, s); err != nil {
|
||||
return err
|
||||
}
|
||||
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
|
||||
if err := Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(&in.PodsHavingTooManyRestarts, &out.PodsHavingTooManyRestarts, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -168,6 +203,9 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
|
||||
return err
|
||||
}
|
||||
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
|
||||
if err := Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(&in.PodsHavingTooManyRestarts, &out.PodsHavingTooManyRestarts, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -103,6 +103,22 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodsHavingTooManyRestarts.
|
||||
func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PodsHavingTooManyRestarts)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) {
|
||||
{
|
||||
@@ -156,6 +172,7 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
out.PodsHavingTooManyRestarts = in.PodsHavingTooManyRestarts
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -103,6 +103,22 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodsHavingTooManyRestarts.
|
||||
func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PodsHavingTooManyRestarts)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) {
|
||||
{
|
||||
@@ -156,6 +172,7 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
out.PodsHavingTooManyRestarts = in.PodsHavingTooManyRestarts
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -90,6 +90,7 @@ func RunDeschedulerStrategies(rs *options.DeschedulerServer, deschedulerPolicy *
|
||||
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], nodes, podEvictor)
|
||||
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], nodes, podEvictor)
|
||||
strategies.RemovePodsViolatingNodeTaints(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeTaints"], nodes, podEvictor)
|
||||
strategies.RemovePodsHavingTooManyRestarts(rs, deschedulerPolicy.Strategies["RemovePodsHavingTooManyRestarts"], nodes, podEvictor)
|
||||
|
||||
// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
|
||||
if rs.DeschedulingInterval.Seconds() == 0 {
|
||||
|
||||
77
pkg/descheduler/strategies/toomanyrestarts.go
Normal file
77
pkg/descheduler/strategies/toomanyrestarts.go
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/klog"
|
||||
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
)
|
||||
|
||||
// RemovePodsHavingTooManyRestarts removes the pods that have too many restarts on node.
|
||||
// There are too many cases leading this issue: Volume mount failed, app error due to nodes' different settings.
|
||||
// As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages.
|
||||
func RemovePodsHavingTooManyRestarts(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
|
||||
if !strategy.Enabled || strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold < 1 {
|
||||
return
|
||||
}
|
||||
removePodsHavingTooManyRestarts(ds, strategy, nodes, podEvictor, ds.EvictLocalStoragePods)
|
||||
}
|
||||
|
||||
func removePodsHavingTooManyRestarts(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, evictLocalStoragePods bool) {
|
||||
for _, node := range nodes {
|
||||
klog.V(1).Infof("Processing node: %s", node.Name)
|
||||
pods, err := podutil.ListEvictablePodsOnNode(ds.Client, node, evictLocalStoragePods)
|
||||
if err != nil {
|
||||
klog.Errorf("Error when list pods at node %s", node.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
for i, pod := range pods {
|
||||
restarts, initRestarts := calcContainerRestarts(pod)
|
||||
if strategy.Params.PodsHavingTooManyRestarts.IncludingInitContainers {
|
||||
if restarts+initRestarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold {
|
||||
continue
|
||||
}
|
||||
} else if restarts < strategy.Params.PodsHavingTooManyRestarts.PodRestartThreshold {
|
||||
continue
|
||||
}
|
||||
if _, err := podEvictor.EvictPod(pods[i], node); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// calcContainerRestarts get container restarts and init container restarts.
|
||||
func calcContainerRestarts(pod *v1.Pod) (int32, int32) {
|
||||
var restarts, initRestarts int32
|
||||
|
||||
for _, cs := range pod.Status.ContainerStatuses {
|
||||
restarts += cs.RestartCount
|
||||
}
|
||||
|
||||
for _, cs := range pod.Status.InitContainerStatuses {
|
||||
initRestarts += cs.RestartCount
|
||||
}
|
||||
|
||||
return restarts, initRestarts
|
||||
}
|
||||
191
pkg/descheduler/strategies/toomanyrestarts_test.go
Normal file
191
pkg/descheduler/strategies/toomanyrestarts_test.go
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"fmt"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
func initPods(node *v1.Node) []v1.Pod {
|
||||
pods := make([]v1.Pod, 0)
|
||||
|
||||
for i := int32(0); i <= 9; i++ {
|
||||
pod := test.BuildTestPod(fmt.Sprintf("pod-%d", i), 100, 0, node.Name, nil)
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
// pod at index i will have 25 * i restarts.
|
||||
pod.Status = v1.PodStatus{
|
||||
InitContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 5 * i,
|
||||
},
|
||||
},
|
||||
ContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
RestartCount: 10 * i,
|
||||
},
|
||||
{
|
||||
RestartCount: 10 * i,
|
||||
},
|
||||
},
|
||||
}
|
||||
pods = append(pods, *pod)
|
||||
}
|
||||
|
||||
// The following 3 pods won't get evicted.
|
||||
// A daemonset.
|
||||
pods[6].ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
// A pod with local storage.
|
||||
pods[7].ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pods[7].Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pods[8].Annotations = test.GetMirrorPodAnnotation()
|
||||
|
||||
return pods
|
||||
}
|
||||
|
||||
func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
|
||||
createStrategy := func(enabled, includingInitContainers bool, restartThresholds int32) api.DeschedulerStrategy {
|
||||
return api.DeschedulerStrategy{
|
||||
Enabled: enabled,
|
||||
Params: api.StrategyParameters{
|
||||
PodsHavingTooManyRestarts: api.PodsHavingTooManyRestarts{
|
||||
PodRestartThreshold: restartThresholds,
|
||||
IncludingInitContainers: includingInitContainers,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
node := test.BuildTestNode("node1", 2000, 3000, 10, nil)
|
||||
pods := initPods(node)
|
||||
|
||||
fakeClient := &fake.Clientset{}
|
||||
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, &v1.PodList{Items: pods}, nil
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
pods []v1.Pod
|
||||
strategy api.DeschedulerStrategy
|
||||
expectedEvictedPodCount int
|
||||
maxPodsToEvict int
|
||||
}{
|
||||
{
|
||||
description: "All pods have total restarts under threshold, no pod evictions",
|
||||
strategy: createStrategy(true, true, 10000),
|
||||
expectedEvictedPodCount: 0,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Some pods have total restarts bigger than threshold",
|
||||
strategy: createStrategy(true, true, 1),
|
||||
expectedEvictedPodCount: 6,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Nine pods have total restarts equals threshold(includingInitContainers=true), 6 pods evictions",
|
||||
strategy: createStrategy(true, true, 1*25),
|
||||
expectedEvictedPodCount: 6,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Nine pods have total restarts equals threshold(includingInitContainers=false), 5 pods evictions",
|
||||
strategy: createStrategy(true, false, 1*25),
|
||||
expectedEvictedPodCount: 5,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "All pods have total restarts equals threshold(includingInitContainers=true), 6 pods evictions",
|
||||
strategy: createStrategy(true, true, 1*20),
|
||||
expectedEvictedPodCount: 6,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Nine pods have total restarts equals threshold(includingInitContainers=false), 6 pods evictions",
|
||||
strategy: createStrategy(true, false, 1*20),
|
||||
expectedEvictedPodCount: 6,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Five pods have total restarts bigger than threshold(includingInitContainers=true), but only 1 pod eviction",
|
||||
strategy: createStrategy(true, true, 5*25+1),
|
||||
expectedEvictedPodCount: 1,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Five pods have total restarts bigger than threshold(includingInitContainers=false), but only 1 pod eviction",
|
||||
strategy: createStrategy(true, false, 5*20+1),
|
||||
expectedEvictedPodCount: 1,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "All pods have total restarts equals threshold(maxPodsToEvict=3), 3 pods evictions",
|
||||
strategy: createStrategy(true, true, 1),
|
||||
expectedEvictedPodCount: 3,
|
||||
maxPodsToEvict: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
|
||||
ds := options.DeschedulerServer{
|
||||
DeschedulerConfiguration: componentconfig.DeschedulerConfiguration{
|
||||
MaxNoOfPodsToEvictPerNode: tc.maxPodsToEvict,
|
||||
},
|
||||
Client: fakeClient,
|
||||
}
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
tc.maxPodsToEvict,
|
||||
[]*v1.Node{node},
|
||||
)
|
||||
|
||||
removePodsHavingTooManyRestarts(&ds, tc.strategy, []*v1.Node{node}, podEvictor, ds.EvictLocalStoragePods)
|
||||
actualEvictedPodCount := podEvictor.TotalEvicted()
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user