From 4819ab9c6992e1e4952811e068985cdd6d6f8058 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 12 May 2020 00:10:26 -0500 Subject: [PATCH 1/4] Add namespace to pod eviction log messages In multi-tenant environments it is useful to know which namespace a pod was evicted from. Therefore log the namespace when evicting pods. Also, do not log a nil error when successfully evicting pods. --- pkg/descheduler/evictions/evictions.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 2e400fac6..d234c833c 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -91,11 +91,11 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) success, err = EvictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun) if success { pe.nodepodCount[node]++ - klog.V(1).Infof("Evicted pod: %#v (%#v)", pod.Name, err) + klog.V(1).Infof("Evicted pod: %#v in namespace %#v", pod.Name, pod.Namespace) return success, nil } // err is used only for logging purposes - klog.Errorf("Error when evicting pod: %#v (%#v)", pod.Name, err) + klog.Errorf("Error evicting pod: %#v in namespace %#v (%#v)", pod.Name, pod.Namespace, err) return false, nil } From cff984261e4ebe968d3c96bf4dc5c22e86fa87a6 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Tue, 12 May 2020 00:18:20 -0500 Subject: [PATCH 2/4] Log an error when EvictPod method returns a non-nil error End users should be able to see the detailed error from the EvictPod method when it fails. Updates all strategies to log the error. The PodLifeTime strategy already logs this error. --- pkg/descheduler/strategies/duplicates.go | 3 ++- pkg/descheduler/strategies/lownodeutilization.go | 1 + pkg/descheduler/strategies/node_affinity.go | 4 +++- pkg/descheduler/strategies/node_taint.go | 4 +++- pkg/descheduler/strategies/pod_antiaffinity.go | 1 + pkg/descheduler/strategies/toomanyrestarts.go | 4 +++- 6 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pkg/descheduler/strategies/duplicates.go b/pkg/descheduler/strategies/duplicates.go index 2bc6541ac..258521f3c 100644 --- a/pkg/descheduler/strategies/duplicates.go +++ b/pkg/descheduler/strategies/duplicates.go @@ -20,7 +20,7 @@ import ( "context" "strings" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" @@ -49,6 +49,7 @@ func RemoveDuplicatePods( // i = 0 does not evict the first pod for i := 1; i < len(pods); i++ { if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) break } } diff --git a/pkg/descheduler/strategies/lownodeutilization.go b/pkg/descheduler/strategies/lownodeutilization.go index 9fed3fdfe..0fd2148e4 100644 --- a/pkg/descheduler/strategies/lownodeutilization.go +++ b/pkg/descheduler/strategies/lownodeutilization.go @@ -262,6 +262,7 @@ func evictPods( success, err := podEvictor.EvictPod(ctx, pod, node) if err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) break } diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go index fc122d717..8a687f34c 100644 --- a/pkg/descheduler/strategies/node_affinity.go +++ b/pkg/descheduler/strategies/node_affinity.go @@ -18,7 +18,8 @@ package strategies import ( "context" - "k8s.io/api/core/v1" + + v1 "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" @@ -47,6 +48,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter if !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) { klog.V(1).Infof("Evicting pod: %v", pod.Name) if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) break } } diff --git a/pkg/descheduler/strategies/node_taint.go b/pkg/descheduler/strategies/node_taint.go index 691742f3c..2c1dc1989 100644 --- a/pkg/descheduler/strategies/node_taint.go +++ b/pkg/descheduler/strategies/node_taint.go @@ -18,12 +18,13 @@ package strategies import ( "context" + "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" "sigs.k8s.io/descheduler/pkg/utils" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" ) @@ -46,6 +47,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa ) { klog.V(2).Infof("Not all taints with NoSchedule effect are tolerated after update for pod %v on node %v", pods[i].Name, node.Name) if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) break } } diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go index 7bfadada1..e5a04e834 100644 --- a/pkg/descheduler/strategies/pod_antiaffinity.go +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -42,6 +42,7 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients if checkPodsWithAntiAffinityExist(pods[i], pods) { success, err := podEvictor.EvictPod(ctx, pods[i], node) if err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) break } diff --git a/pkg/descheduler/strategies/toomanyrestarts.go b/pkg/descheduler/strategies/toomanyrestarts.go index ba5f5fcc0..ff68bb977 100644 --- a/pkg/descheduler/strategies/toomanyrestarts.go +++ b/pkg/descheduler/strategies/toomanyrestarts.go @@ -18,7 +18,8 @@ package strategies import ( "context" - "k8s.io/api/core/v1" + + v1 "k8s.io/api/core/v1" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog" @@ -53,6 +54,7 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter continue } if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) break } } From 7039b6c8aafbad333fa21b3604ba1dae0a7f9b15 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Thu, 14 May 2020 00:11:54 -0500 Subject: [PATCH 3/4] Refactor function EvictPod to only return an error The EvictPod function previously returned a bool and an error. The function now only returns an error. Callers can check for failure by testing if the returned error is not nil. This aligns the EvictPod function with idiomatic Go best practices. Also, the function name has been changed from EvictPod to evictPod because it is not used outside the evictions package. --- pkg/descheduler/evictions/evictions.go | 31 +++++++++++---------- pkg/descheduler/evictions/evictions_test.go | 10 +++---- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index d234c833c..131ed602c 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -83,25 +83,26 @@ func (pe *PodEvictor) TotalEvicted() int { // EvictPod returns non-nil error only when evicting a pod on a node is not // possible (due to maxPodsToEvict constraint). Success is true when the pod // is evicted on the server side. -func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) (success bool, err error) { +func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) (bool, error) { if pe.maxPodsToEvict > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvict { return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvict, node.Name) } - success, err = EvictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun) - if success { - pe.nodepodCount[node]++ - klog.V(1).Infof("Evicted pod: %#v in namespace %#v", pod.Name, pod.Namespace) - return success, nil + err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun) + if err != nil { + // err is used only for logging purposes + klog.Errorf("Error evicting pod: %#v in namespace %#v (%#v)", pod.Name, pod.Namespace, err) + return false, nil } - // err is used only for logging purposes - klog.Errorf("Error evicting pod: %#v in namespace %#v (%#v)", pod.Name, pod.Namespace, err) - return false, nil + + pe.nodepodCount[node]++ + klog.V(1).Infof("Evicted pod: %#v in namespace %#v", pod.Name, pod.Namespace) + return true, nil } -func EvictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) (bool, error) { +func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) error { if dryRun { - return true, nil + return nil } deleteOptions := &metav1.DeleteOptions{} // GracePeriodSeconds ? @@ -124,13 +125,13 @@ func EvictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: client.CoreV1().Events(pod.Namespace)}) r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"}) r.Event(pod, v1.EventTypeNormal, "Descheduled", "pod evicted by sigs.k8s.io/descheduler") - return true, nil + return nil } if apierrors.IsTooManyRequests(err) { - return false, fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err) + return fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err) } if apierrors.IsNotFound(err) { - return false, fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err) + return fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err) } - return false, err + return err } diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index a1e87f8fe..0d6bf1bb8 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -20,7 +20,7 @@ import ( "context" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -36,21 +36,21 @@ func TestEvictPod(t *testing.T) { node *v1.Node pod *v1.Pod pods []v1.Pod - want bool + want error }{ { description: "test pod eviction - pod present", node: node1, pod: pod1, pods: []v1.Pod{*pod1}, - want: true, + want: nil, }, { description: "test pod eviction - pod absent", node: node1, pod: pod1, pods: []v1.Pod{*test.BuildTestPod("p2", 400, 0, "node1", nil), *test.BuildTestPod("p3", 450, 0, "node1", nil)}, - want: true, + want: nil, }, } @@ -59,7 +59,7 @@ func TestEvictPod(t *testing.T) { fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { return true, &v1.PodList{Items: test.pods}, nil }) - got, _ := EvictPod(ctx, fakeClient, test.pod, "v1", false) + got := evictPod(ctx, fakeClient, test.pod, "v1", false) if got != test.want { t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got) } From 55afde6251c38942027e23f7b3d7cae20c34b095 Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Thu, 14 May 2020 00:36:52 -0500 Subject: [PATCH 4/4] Remove line break in log message from PodLifeTime strategy --- pkg/descheduler/strategies/pod_lifetime.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index 41a08ad17..6e1d1c695 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -18,6 +18,7 @@ package strategies import ( "context" + v1 "k8s.io/api/core/v1" v1meta "k8s.io/apimachinery/pkg/apis/meta/v1" clientset "k8s.io/client-go/kubernetes" @@ -41,7 +42,7 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D for _, pod := range pods { success, err := podEvictor.EvictPod(ctx, pod, node) if success { - klog.V(1).Infof("Evicted pod: %#v\n because it was created more than %v seconds ago", pod.Name, *strategy.Params.MaxPodLifeTimeSeconds) + klog.V(1).Infof("Evicted pod: %#v because it was created more than %v seconds ago", pod.Name, *strategy.Params.MaxPodLifeTimeSeconds) } if err != nil {