From 27fec39c655e2fc6f68cf20ac4004de3c6600e41 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Mon, 20 Sep 2021 11:20:54 -0400 Subject: [PATCH 1/4] RemoveFailedPods: guard against nil descheduler strategy (e.g. in case of default that loads all strategies) --- pkg/descheduler/strategies/failedpods.go | 12 +++++++++--- pkg/descheduler/strategies/failedpods_test.go | 8 ++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index e9c7ad23c..e21e82a79 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -19,7 +19,7 @@ import ( // validatedFailedPodsStrategyParams contains validated strategy parameters type validatedFailedPodsStrategyParams struct { - *validation.ValidatedStrategyParams + validation.ValidatedStrategyParams includingInitContainers bool reasons sets.String excludeOwnerKinds sets.String @@ -46,9 +46,15 @@ func RemoveFailedPods( evictions.WithLabelSelector(strategyParams.LabelSelector), ) + var labelSelector *metav1.LabelSelector + if strategy.Params != nil { + labelSelector = strategy.Params.LabelSelector + } + for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase=" + string(v1.PodFailed) + pods, err := podutil.ListPodsOnANodeWithFieldSelector( ctx, client, @@ -57,7 +63,7 @@ func RemoveFailedPods( podutil.WithFilter(evictable.IsEvictable), podutil.WithNamespaces(strategyParams.IncludedNamespaces.UnsortedList()), podutil.WithoutNamespaces(strategyParams.ExcludedNamespaces.UnsortedList()), - podutil.WithLabelSelector(strategy.Params.LabelSelector), + podutil.WithLabelSelector(labelSelector), ) if err != nil { klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node)) @@ -103,7 +109,7 @@ func validateAndParseRemoveFailedPodsParams( } return &validatedFailedPodsStrategyParams{ - ValidatedStrategyParams: strategyParams, + ValidatedStrategyParams: *strategyParams, includingInitContainers: includingInitContainers, reasons: reasons, excludeOwnerKinds: excludeOwnerKinds, diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/descheduler/strategies/failedpods_test.go index 52f4f0574..c5caf5a43 100644 --- a/pkg/descheduler/strategies/failedpods_test.go +++ b/pkg/descheduler/strategies/failedpods_test.go @@ -44,6 +44,13 @@ func TestRemoveFailedPods(t *testing.T) { expectedEvictedPodCount int pods []v1.Pod }{ + { + description: "default empty strategy, 0 failures, 0 evictions", + strategy: api.DeschedulerStrategy{}, + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{}, // no pods come back with field selector phase=Failed + }, { description: "0 failures, 0 evictions", strategy: createStrategy(true, false, nil, nil, nil, false), @@ -225,6 +232,7 @@ func TestValidRemoveFailedPodsParams(t *testing.T) { params *api.StrategyParameters }{ {name: "validate nil params", params: nil}, + {name: "validate empty params", params: &api.StrategyParameters{}}, {name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ Reasons: []string{"CreateContainerConfigError"}, }}}, From 8e1da960821444ebf4aaef6ab4e4dc0917e55890 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Mon, 20 Sep 2021 18:18:47 -0400 Subject: [PATCH 2/4] e2e TestTopologySpreadConstraint: ensure pods are running before checking for topology spread across domains --- test/e2e/e2e_topologyspreadconstraint_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go index ad814e027..efbb6bef9 100644 --- a/test/e2e/e2e_topologyspreadconstraint_test.go +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -119,6 +119,9 @@ func TestTopologySpreadConstraint(t *testing.T) { t.Fatalf("Pods were not evicted for %s TopologySpreadConstraint", name) } + // Ensure recently evicted Pod are rescheduled and running before asserting for a balanced topology spread + waitForRCPodsRunning(ctx, t, clientSet, rc) + pods, err := clientSet.CoreV1().Pods(testNamespace.Name).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", tc.labelKey, tc.labelValue)}) if err != nil { t.Errorf("Error listing pods for %s: %v", name, err) From 41fca2ed61223b815ef8872bd7523594fdfd7a03 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Sat, 25 Sep 2021 00:45:41 -0400 Subject: [PATCH 3/4] e2e tests for RemoveFailedPods strategy Fix priority class default --- pkg/descheduler/strategies/failedpods.go | 4 +- .../strategies/validation/strategyparams.go | 12 +- test/e2e/e2e_failedpods_test.go | 154 ++++++++++++++++++ test/e2e/e2e_test.go | 32 ++-- test/e2e/e2e_topologyspreadconstraint_test.go | 19 +-- 5 files changed, 184 insertions(+), 37 deletions(-) create mode 100644 test/e2e/e2e_failedpods_test.go diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index e21e82a79..74d79956c 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -90,7 +90,9 @@ func validateAndParseRemoveFailedPodsParams( params *api.StrategyParameters, ) (*validatedFailedPodsStrategyParams, error) { if params == nil { - return &validatedFailedPodsStrategyParams{}, nil + return &validatedFailedPodsStrategyParams{ + ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(), + }, nil } strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params) diff --git a/pkg/descheduler/strategies/validation/strategyparams.go b/pkg/descheduler/strategies/validation/strategyparams.go index cd6d4570a..33446e6fa 100644 --- a/pkg/descheduler/strategies/validation/strategyparams.go +++ b/pkg/descheduler/strategies/validation/strategyparams.go @@ -22,20 +22,22 @@ type ValidatedStrategyParams struct { NodeFit bool } +func DefaultValidatedStrategyParams() ValidatedStrategyParams { + return ValidatedStrategyParams{ThresholdPriority: utils.SystemCriticalPriority} +} + func ValidateAndParseStrategyParams( ctx context.Context, client clientset.Interface, params *api.StrategyParameters, ) (*ValidatedStrategyParams, error) { - var includedNamespaces, excludedNamespaces sets.String if params == nil { - return &ValidatedStrategyParams{ - IncludedNamespaces: includedNamespaces, - ExcludedNamespaces: excludedNamespaces, - }, nil + defaultValidatedStrategyParams := DefaultValidatedStrategyParams() + return &defaultValidatedStrategyParams, nil } // At most one of include/exclude can be set + var includedNamespaces, excludedNamespaces sets.String if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return nil, fmt.Errorf("only one of Include/Exclude namespaces can be set") } diff --git a/test/e2e/e2e_failedpods_test.go b/test/e2e/e2e_failedpods_test.go new file mode 100644 index 000000000..4b4a9665f --- /dev/null +++ b/test/e2e/e2e_failedpods_test.go @@ -0,0 +1,154 @@ +package e2e + +import ( + "context" + batchv1 "k8s.io/api/batch/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "strings" + "testing" + "time" + + deschedulerapi "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/strategies" +) + +var oneHourPodLifetimeSeconds uint = 3600 + +func TestFailedPods(t *testing.T) { + ctx := context.Background() + clientSet, _, stopCh := initializeClient(t) + defer close(stopCh) + nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + t.Errorf("Error listing node with %v", err) + } + nodes, _ := splitNodesAndWorkerNodes(nodeList.Items) + t.Log("Creating testing namespace") + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + testCases := map[string]struct { + expectedEvictedCount int + strategyParams *deschedulerapi.StrategyParameters + }{ + "test-failed-pods-nil-strategy": { + expectedEvictedCount: 1, + strategyParams: nil, + }, + "test-failed-pods-default-strategy": { + expectedEvictedCount: 1, + strategyParams: &deschedulerapi.StrategyParameters{}, + }, + "test-failed-pods-default-failed-pods": { + expectedEvictedCount: 1, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{}, + }, + }, + "test-failed-pods-reason-unmatched": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{Reasons: []string{"ReasonDoesNotMatch"}}, + }, + }, + "test-failed-pods-min-age-unmet": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{MinPodLifetimeSeconds: &oneHourPodLifetimeSeconds}, + }, + }, + "test-failed-pods-exclude-job-kind": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{ExcludeOwnerKinds: []string{"Job"}}, + }, + }, + } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + job := initFailedJob(name, testNamespace.Namespace) + t.Logf("Creating job %s in %s namespace", job.Name, job.Namespace) + jobClient := clientSet.BatchV1().Jobs(testNamespace.Name) + if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating Job %s: %v", name, err) + } + deletePropagationPolicy := metav1.DeletePropagationForeground + defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) + + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + + t.Logf("Running RemoveFailedPods strategy for %s", name) + strategies.RemoveFailedPods( + ctx, + clientSet, + deschedulerapi.DeschedulerStrategy{ + Enabled: true, + Params: tc.strategyParams, + }, + nodes, + podEvictor, + ) + t.Logf("Finished RemoveFailedPods strategy for %s", name) + + if actualEvictedCount := podEvictor.TotalEvicted(); actualEvictedCount == tc.expectedEvictedCount { + t.Logf("Total of %d Pods were evicted for %s", actualEvictedCount, name) + } else { + t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedCount, tc.expectedEvictedCount) + } + }) + } +} + +func initFailedJob(name, namespace string) *batchv1.Job { + podSpec := MakePodSpec("", nil) + podSpec.Containers[0].Command = []string{"/bin/false"} + podSpec.RestartPolicy = v1.RestartPolicyNever + labelsSet := labels.Set{"test": name, "name": name} + jobBackoffLimit := int32(0) + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelsSet, + Name: name, + Namespace: namespace, + }, + Spec: batchv1.JobSpec{ + Template: v1.PodTemplateSpec{ + Spec: podSpec, + ObjectMeta: metav1.ObjectMeta{Labels: labelsSet}, + }, + BackoffLimit: &jobBackoffLimit, + }, + } +} + +func waitForJobPodPhase(ctx context.Context, t *testing.T, clientSet clientset.Interface, job *batchv1.Job, phase v1.PodPhase) { + podClient := clientSet.CoreV1().Pods(job.Namespace) + if err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { + t.Log(labels.FormatLabels(job.Labels)) + if podList, err := podClient.List(ctx, metav1.ListOptions{LabelSelector: labels.FormatLabels(job.Labels)}); err != nil { + return false, err + } else { + if len(podList.Items) == 0 { + t.Logf("Job controller has not created Pod for job %s yet", job.Name) + return false, nil + } + for _, pod := range podList.Items { + if pod.Status.Phase != phase { + t.Logf("Pod %v not in %s phase yet, is %v instead", pod.Name, phase, pod.Status.Phase) + return false, nil + } + } + t.Logf("Job %v Pod is in %s phase now", job.Name, phase) + return true, nil + } + }); err != nil { + t.Fatalf("Error waiting for pods in %s phase: %v", phase, err) + } +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 9a3008f2a..0c1d435d5 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -290,20 +290,7 @@ func TestLowNodeUtilization(t *testing.T) { waitForRCPodsRunning(ctx, t, clientSet, rc) // Run LowNodeUtilization strategy - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("%v", err) - } - podEvictor := evictions.NewPodEvictor( - clientSet, - evictionPolicyGroupVersion, - false, - 0, - nodes, - true, - false, - false, - ) + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) podsOnMosttUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, workerNodes[0], podutil.WithFilter(podEvictor.Evictable().IsEvictable)) if err != nil { @@ -1293,3 +1280,20 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) { } return allNodes, workerNodes } + +func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor { + evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) + if err != nil || len(evictionPolicyGroupVersion) == 0 { + t.Fatalf("Error creating eviction policy group: %v", err) + } + return evictions.NewPodEvictor( + clientSet, + evictionPolicyGroupVersion, + false, + 0, + nodes, + true, + false, + false, + ) +} diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go index efbb6bef9..d6e1c8b42 100644 --- a/test/e2e/e2e_topologyspreadconstraint_test.go +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -11,8 +11,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" deschedulerapi "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" "sigs.k8s.io/descheduler/pkg/descheduler/strategies" ) @@ -79,22 +77,9 @@ func TestTopologySpreadConstraint(t *testing.T) { defer deleteRC(ctx, t, clientSet, violatorRc) waitForRCPodsRunning(ctx, t, clientSet, violatorRc) - // Run TopologySpreadConstraint strategy - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("Error creating eviction policy group for %s: %v", name, err) - } - podEvictor := evictions.NewPodEvictor( - clientSet, - evictionPolicyGroupVersion, - false, - 0, - nodes, - true, - false, - false, - ) + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + // Run TopologySpreadConstraint strategy t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name) strategies.RemovePodsViolatingTopologySpreadConstraint( ctx, From 12e6bf931da26b45ba8b8d4980d63c7b8094a512 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Tue, 28 Sep 2021 10:55:01 -0400 Subject: [PATCH 4/4] Update Docs, Manifests, and Helm Chart version to 0.22.1 --- README.md | 6 +++--- charts/descheduler/Chart.yaml | 4 ++-- docs/user-guide.md | 1 + kubernetes/cronjob/cronjob.yaml | 2 +- kubernetes/deployment/deployment.yaml | 2 +- kubernetes/job/job.yaml | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e9a23ca03..36dce8aa0 100644 --- a/README.md +++ b/README.md @@ -103,17 +103,17 @@ See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/ku Run As A Job ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.1' | kubectl apply -f - ``` Run As A CronJob ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.1' | kubectl apply -f - ``` Run As A Deployment ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.1' | kubectl apply -f - ``` ## User Guide diff --git a/charts/descheduler/Chart.yaml b/charts/descheduler/Chart.yaml index b194a1b7e..dab332823 100644 --- a/charts/descheduler/Chart.yaml +++ b/charts/descheduler/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: descheduler -version: 0.22.0 -appVersion: 0.22.0 +version: 0.22.1 +appVersion: 0.22.1 description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that. keywords: - kubernetes diff --git a/docs/user-guide.md b/docs/user-guide.md index f4a4881e6..c7996020d 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -4,6 +4,7 @@ Starting with descheduler release v0.10.0 container images are available in the Descheduler Version | Container Image | Architectures | ------------------- |-----------------------------------------------------|-------------------------| +v0.22.1 | k8s.gcr.io/descheduler/descheduler:v0.22.1 | AMD64
ARM64
ARMv7 | v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64
ARM64
ARMv7 | v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64
ARM64
ARMv7 | v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64
ARM64 | diff --git a/kubernetes/cronjob/cronjob.yaml b/kubernetes/cronjob/cronjob.yaml index c38d52efa..8f30b5379 100644 --- a/kubernetes/cronjob/cronjob.yaml +++ b/kubernetes/cronjob/cronjob.yaml @@ -16,7 +16,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 volumeMounts: - mountPath: /policy-dir name: policy-volume diff --git a/kubernetes/deployment/deployment.yaml b/kubernetes/deployment/deployment.yaml index 478cc6907..82ff86806 100644 --- a/kubernetes/deployment/deployment.yaml +++ b/kubernetes/deployment/deployment.yaml @@ -19,7 +19,7 @@ spec: serviceAccountName: descheduler-sa containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 imagePullPolicy: IfNotPresent command: - "/bin/descheduler" diff --git a/kubernetes/job/job.yaml b/kubernetes/job/job.yaml index 9f28d2976..1c544fee5 100644 --- a/kubernetes/job/job.yaml +++ b/kubernetes/job/job.yaml @@ -14,7 +14,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 volumeMounts: - mountPath: /policy-dir name: policy-volume