1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #677 from ingvagabund/accumulated-eviction

Use a fake client when evicting pods by individual strategies to accumulate the evictions
This commit is contained in:
Kubernetes Prow Robot
2022-01-20 08:15:52 -08:00
committed by GitHub
16 changed files with 159 additions and 11 deletions

View File

@@ -21,11 +21,20 @@ import (
"fmt"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/klog/v2"
corev1informers "k8s.io/client-go/informers/core/v1"
schedulingv1informers "k8s.io/client-go/informers/scheduling/v1"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
@@ -67,10 +76,92 @@ func Run(rs *options.DeschedulerServer) error {
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc)
func cachedClient(
realClient clientset.Interface,
podInformer corev1informers.PodInformer,
nodeInformer corev1informers.NodeInformer,
namespaceInformer corev1informers.NamespaceInformer,
priorityClassInformer schedulingv1informers.PriorityClassInformer,
) (clientset.Interface, error) {
fakeClient := fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() == "eviction" {
createAct, matched := action.(core.CreateActionImpl)
if !matched {
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
}
eviction, matched := createAct.Object.(*policy.Eviction)
if !matched {
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
}
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
}
return true, nil, nil
}
// fallback to the default reactor
return false, nil, nil
})
klog.V(3).Infof("Pulling resources for the cached client from the cluster")
pods, err := podInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list pods: %v", err)
}
for _, item := range pods {
if _, err := fakeClient.CoreV1().Pods(item.Namespace).Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy pod: %v", err)
}
}
nodes, err := nodeInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list nodes: %v", err)
}
for _, item := range nodes {
if _, err := fakeClient.CoreV1().Nodes().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy node: %v", err)
}
}
namespaces, err := namespaceInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list namespaces: %v", err)
}
for _, item := range namespaces {
if _, err := fakeClient.CoreV1().Namespaces().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy node: %v", err)
}
}
priorityClasses, err := priorityClassInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list priorityclasses: %v", err)
}
for _, item := range priorityClasses {
if _, err := fakeClient.SchedulingV1().PriorityClasses().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy priorityclass: %v", err)
}
}
return fakeClient, nil
}
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, stopChannel chan struct{}) error {
sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0)
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
podInformer := sharedInformerFactory.Core().V1().Pods()
namespaceInformer := sharedInformerFactory.Core().V1().Namespaces()
priorityClassInformer := sharedInformerFactory.Scheduling().V1().PriorityClasses()
// create the informers
namespaceInformer.Informer()
priorityClassInformer.Informer()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
@@ -138,8 +229,39 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
return
}
var podEvictorClient clientset.Interface
// When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client.
// So when evicting pods while running multiple strategies in a row have the cummulative effect
// as is when evicting pods for real.
if rs.DryRun {
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
// Create a new cache so we start from scratch without any leftovers
fakeClient, err := cachedClient(rs.Client, podInformer, nodeInformer, namespaceInformer, priorityClassInformer)
if err != nil {
klog.Error(err)
return
}
fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods())
if err != nil {
klog.Errorf("build get pods assigned to node function error: %v", err)
return
}
fakeCtx, cncl := context.WithCancel(context.TODO())
defer cncl()
fakeSharedInformerFactory.Start(fakeCtx.Done())
fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
podEvictorClient = fakeClient
} else {
podEvictorClient = rs.Client
}
klog.V(3).Infof("Building a pod evictor")
podEvictor := evictions.NewPodEvictor(
rs.Client,
podEvictorClient,
evictionPolicyGroupVersion,
rs.DryRun,
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
@@ -149,6 +271,7 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
evictSystemCriticalPods,
ignorePvcPods,
evictBarePods,
!rs.DisableMetrics,
)
for name, strategy := range deschedulerPolicy.Strategies {

View File

@@ -61,6 +61,7 @@ type PodEvictor struct {
evictLocalStoragePods bool
evictSystemCriticalPods bool
ignorePvcPods bool
metricsEnabled bool
}
func NewPodEvictor(
@@ -74,6 +75,7 @@ func NewPodEvictor(
evictSystemCriticalPods bool,
ignorePvcPods bool,
evictFailedBarePods bool,
metricsEnabled bool,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
var namespacePodCount = make(namespacePodEvictCount)
@@ -95,6 +97,7 @@ func NewPodEvictor(
evictSystemCriticalPods: evictSystemCriticalPods,
evictFailedBarePods: evictFailedBarePods,
ignorePvcPods: ignorePvcPods,
metricsEnabled: metricsEnabled,
}
}
@@ -121,20 +124,26 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node,
reason += " (" + strings.Join(reasons, ", ") + ")"
}
if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[node]+1 > *pe.maxPodsToEvictPerNode {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, node.Name)
}
if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace)
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun)
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
if err != nil {
// err is used only for logging purposes
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason)
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, nil
}
@@ -149,15 +158,14 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node,
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)})
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason))
metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
}
return true, nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) error {
if dryRun {
return nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
deleteOptions := &metav1.DeleteOptions{}
// GracePeriodSeconds ?
eviction := &policy.Eviction{

View File

@@ -62,7 +62,7 @@ func TestEvictPod(t *testing.T) {
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: test.pods}, nil
})
got := evictPod(ctx, fakeClient, test.pod, "v1", false)
got := evictPod(ctx, fakeClient, test.pod, "v1")
if got != test.want {
t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got)
}

View File

@@ -301,6 +301,7 @@ func TestFindDuplicatePods(t *testing.T) {
false,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
@@ -727,6 +728,7 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
false,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -265,6 +265,7 @@ func TestRemoveFailedPods(t *testing.T) {
false,
false,
false,
false,
)
RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -226,6 +226,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
false,
false,
false,
false,
)
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -263,6 +263,7 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
tc.evictSystemCriticalPods,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{

View File

@@ -467,6 +467,7 @@ func TestHighNodeUtilization(t *testing.T) {
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
@@ -671,6 +672,7 @@ func TestHighNodeUtilizationWithTaints(t *testing.T) {
false,
false,
false,
false,
)
HighNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -724,6 +724,7 @@ func TestLowNodeUtilization(t *testing.T) {
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
@@ -1036,6 +1037,7 @@ func TestLowNodeUtilizationWithTaints(t *testing.T) {
false,
false,
false,
false,
)
LowNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -213,6 +213,7 @@ func TestPodAntiAffinity(t *testing.T) {
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
Params: &api.StrategyParameters{

View File

@@ -302,6 +302,7 @@ func TestPodLifeTime(t *testing.T) {
false,
tc.ignorePvcPods,
false,
false,
)
PodLifeTime(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -238,6 +238,7 @@ func TestRemovePodsHavingTooManyRestarts(t *testing.T) {
false,
false,
false,
false,
)
RemovePodsHavingTooManyRestarts(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)

View File

@@ -906,6 +906,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
false,
false,
false,
false,
)
RemovePodsViolatingTopologySpreadConstraint(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
podsEvicted := podEvictor.TotalEvicted()

View File

@@ -151,6 +151,7 @@ func TestRemoveDuplicates(t *testing.T) {
false,
false,
false,
false,
)
t.Log("Running DeschedulerStrategy strategy")

View File

@@ -203,6 +203,7 @@ func runPodLifetimeStrategy(
evictCritical,
false,
false,
false,
),
getPodsAssignedToNode,
)
@@ -1341,5 +1342,6 @@ func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*
false,
false,
false,
false,
)
}

View File

@@ -141,6 +141,7 @@ func TestTooManyRestarts(t *testing.T) {
false,
false,
false,
false,
)
// Run RemovePodsHavingTooManyRestarts strategy
t.Log("Running RemovePodsHavingTooManyRestarts strategy")