diff --git a/test/e2e/e2e_failedpods_test.go b/test/e2e/e2e_failedpods_test.go index 7ff28956e..9a892315e 100644 --- a/test/e2e/e2e_failedpods_test.go +++ b/test/e2e/e2e_failedpods_test.go @@ -11,122 +11,209 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" componentbaseconfig "k8s.io/component-base/config" utilptr "k8s.io/utils/ptr" + "sigs.k8s.io/descheduler/pkg/api" + apiv1alpha2 "sigs.k8s.io/descheduler/pkg/api/v1alpha2" "sigs.k8s.io/descheduler/pkg/descheduler/client" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" "sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor" "sigs.k8s.io/descheduler/pkg/framework/plugins/removefailedpods" - frameworktesting "sigs.k8s.io/descheduler/pkg/framework/testing" - frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" ) -var oneHourPodLifetimeSeconds uint = 3600 +var ( + oneHourPodLifetimeSeconds uint = 3600 + oneSecondPodLifetimeSeconds uint = 1 +) + +func removeFailedPodsPolicy(removeFailedPodsArgs *removefailedpods.RemoveFailedPodsArgs, evictorArgs *defaultevictor.DefaultEvictorArgs) *apiv1alpha2.DeschedulerPolicy { + return &apiv1alpha2.DeschedulerPolicy{ + Profiles: []apiv1alpha2.DeschedulerProfile{ + { + Name: removefailedpods.PluginName + "Profile", + PluginConfigs: []apiv1alpha2.PluginConfig{ + { + Name: removefailedpods.PluginName, + Args: runtime.RawExtension{ + Object: removeFailedPodsArgs, + }, + }, + { + Name: defaultevictor.PluginName, + Args: runtime.RawExtension{ + Object: evictorArgs, + }, + }, + }, + Plugins: apiv1alpha2.Plugins{ + Filter: apiv1alpha2.PluginSet{ + Enabled: []string{ + defaultevictor.PluginName, + }, + }, + Deschedule: apiv1alpha2.PluginSet{ + Enabled: []string{ + removefailedpods.PluginName, + }, + }, + }, + }, + }, + } +} func TestFailedPods(t *testing.T) { ctx := context.Background() clientSet, err := client.CreateClient(componentbaseconfig.ClientConnectionConfiguration{Kubeconfig: os.Getenv("KUBECONFIG")}, "") if err != nil { - t.Errorf("Error during client creation with %v", err) + t.Errorf("Error during kubernetes client creation with %v", err) } - nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) - if err != nil { - t.Errorf("Error listing node with %v", err) - } - nodes, _ := splitNodesAndWorkerNodes(nodeList.Items) t.Log("Creating testing namespace") testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { t.Fatalf("Unable to create ns %v", testNamespace.Name) } defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) - testCases := map[string]struct { - expectedEvictedCount uint - args *removefailedpods.RemoveFailedPodsArgs + + tests := []struct { + name string + expectedEvictedPodCount int + removeFailedPodsArgs *removefailedpods.RemoveFailedPodsArgs }{ - "test-failed-pods-default-args": { - expectedEvictedCount: 1, - args: &removefailedpods.RemoveFailedPodsArgs{}, - }, - "test-failed-pods-reason-unmatched": { - expectedEvictedCount: 0, - args: &removefailedpods.RemoveFailedPodsArgs{ - Reasons: []string{"ReasonDoesNotMatch"}, + { + name: "test-failed-pods-default-args", + expectedEvictedPodCount: 1, + removeFailedPodsArgs: &removefailedpods.RemoveFailedPodsArgs{ + MinPodLifetimeSeconds: &oneSecondPodLifetimeSeconds, }, }, - "test-failed-pods-min-age-unmet": { - expectedEvictedCount: 0, - args: &removefailedpods.RemoveFailedPodsArgs{ + { + name: "test-failed-pods-reason-unmatched", + expectedEvictedPodCount: 0, + removeFailedPodsArgs: &removefailedpods.RemoveFailedPodsArgs{ + Reasons: []string{"ReasonDoesNotMatch"}, + MinPodLifetimeSeconds: &oneSecondPodLifetimeSeconds, + }, + }, + { + name: "test-failed-pods-min-age-unmet", + expectedEvictedPodCount: 0, + removeFailedPodsArgs: &removefailedpods.RemoveFailedPodsArgs{ MinPodLifetimeSeconds: &oneHourPodLifetimeSeconds, }, }, - "test-failed-pods-exclude-job-kind": { - expectedEvictedCount: 0, - args: &removefailedpods.RemoveFailedPodsArgs{ - ExcludeOwnerKinds: []string{"Job"}, + { + name: "test-failed-pods-exclude-job-kind", + expectedEvictedPodCount: 0, + removeFailedPodsArgs: &removefailedpods.RemoveFailedPodsArgs{ + ExcludeOwnerKinds: []string{"Job"}, + MinPodLifetimeSeconds: &oneSecondPodLifetimeSeconds, }, }, } - for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - job := initFailedJob(name, testNamespace.Namespace) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + job := initFailedJob(tc.name, testNamespace.Namespace) t.Logf("Creating job %s in %s namespace", job.Name, job.Namespace) jobClient := clientSet.BatchV1().Jobs(testNamespace.Name) if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil { - t.Fatalf("Error creating Job %s: %v", name, err) + t.Fatalf("Error creating Job %s: %v", tc.name, err) } deletePropagationPolicy := metav1.DeletePropagationForeground - defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + defer func() { + jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + waitForPodsToDisappear(ctx, t, clientSet, job.Labels, job.Namespace) + }() waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("Error detecting eviction policy group: %v", err) + preRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...) + + // Deploy the descheduler with the configured policy + evictorArgs := &defaultevictor.DefaultEvictorArgs{ + EvictLocalStoragePods: true, + EvictSystemCriticalPods: false, + IgnorePvcPods: false, + EvictFailedBarePods: false, + } + tc.removeFailedPodsArgs.Namespaces = &api.Namespaces{ + Include: []string{testNamespace.Name}, } - handle, podEvictor, err := frameworktesting.InitFrameworkHandle( - ctx, - clientSet, - evictions.NewOptions(). - WithPolicyGroupVersion(evictionPolicyGroupVersion), - defaultevictor.DefaultEvictorArgs{ - EvictLocalStoragePods: true, - }, - nil, - ) + deschedulerPolicyConfigMapObj, err := deschedulerPolicyConfigMap(removeFailedPodsPolicy(tc.removeFailedPodsArgs, evictorArgs)) if err != nil { - t.Fatalf("Unable to initialize a framework handle: %v", err) + t.Fatalf("Error creating %q CM: %v", deschedulerPolicyConfigMapObj.Name, err) } - t.Logf("Running RemoveFailedPods strategy for %s", name) - - plugin, err := removefailedpods.New(&removefailedpods.RemoveFailedPodsArgs{ - Reasons: tc.args.Reasons, - MinPodLifetimeSeconds: tc.args.MinPodLifetimeSeconds, - IncludingInitContainers: tc.args.IncludingInitContainers, - ExcludeOwnerKinds: tc.args.ExcludeOwnerKinds, - LabelSelector: tc.args.LabelSelector, - Namespaces: tc.args.Namespaces, - }, - handle, - ) + t.Logf("Creating %q policy CM with RemoveDuplicates configured...", deschedulerPolicyConfigMapObj.Name) + _, err = clientSet.CoreV1().ConfigMaps(deschedulerPolicyConfigMapObj.Namespace).Create(ctx, deschedulerPolicyConfigMapObj, metav1.CreateOptions{}) if err != nil { - t.Fatalf("Unable to initialize the plugin: %v", err) + t.Fatalf("Error creating %q CM: %v", deschedulerPolicyConfigMapObj.Name, err) } - plugin.(frameworktypes.DeschedulePlugin).Deschedule(ctx, nodes) - t.Logf("Finished RemoveFailedPods strategy for %s", name) + defer func() { + t.Logf("Deleting %q CM...", deschedulerPolicyConfigMapObj.Name) + err = clientSet.CoreV1().ConfigMaps(deschedulerPolicyConfigMapObj.Namespace).Delete(ctx, deschedulerPolicyConfigMapObj.Name, metav1.DeleteOptions{}) + if err != nil { + t.Fatalf("Unable to delete %q CM: %v", deschedulerPolicyConfigMapObj.Name, err) + } + }() - if actualEvictedCount := podEvictor.TotalEvicted(); actualEvictedCount == tc.expectedEvictedCount { - t.Logf("Total of %d Pods were evicted for %s", actualEvictedCount, name) + deschedulerDeploymentObj := deschedulerDeployment(testNamespace.Name) + t.Logf("Creating descheduler deployment %v", deschedulerDeploymentObj.Name) + _, err = clientSet.AppsV1().Deployments(deschedulerDeploymentObj.Namespace).Create(ctx, deschedulerDeploymentObj, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Error creating %q deployment: %v", deschedulerDeploymentObj.Name, err) + } + + deschedulerPodName := "" + defer func() { + if deschedulerPodName != "" { + printPodLogs(ctx, t, clientSet, deschedulerPodName) + } + + t.Logf("Deleting %q deployment...", deschedulerDeploymentObj.Name) + err = clientSet.AppsV1().Deployments(deschedulerDeploymentObj.Namespace).Delete(ctx, deschedulerDeploymentObj.Name, metav1.DeleteOptions{}) + if err != nil { + t.Fatalf("Unable to delete %q deployment: %v", deschedulerDeploymentObj.Name, err) + } + + waitForPodsToDisappear(ctx, t, clientSet, deschedulerDeploymentObj.Labels, deschedulerDeploymentObj.Namespace) + }() + + t.Logf("Waiting for the descheduler pod running") + deschedulerPods := waitForPodsRunning(ctx, t, clientSet, deschedulerDeploymentObj.Labels, 1, deschedulerDeploymentObj.Namespace) + if len(deschedulerPods) != 0 { + deschedulerPodName = deschedulerPods[0].Name + } + + // Run RemoveDuplicates strategy + var meetsExpectations bool + var actualEvictedPodCount int + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) { + currentRunNames := sets.NewString(getCurrentPodNames(ctx, clientSet, testNamespace.Name, t)...) + actualEvictedPod := preRunNames.Difference(currentRunNames) + actualEvictedPodCount = actualEvictedPod.Len() + t.Logf("preRunNames: %v, currentRunNames: %v, actualEvictedPodCount: %v\n", preRunNames.List(), currentRunNames.List(), actualEvictedPodCount) + if actualEvictedPodCount != tc.expectedEvictedPodCount { + t.Logf("Expecting %v number of pods evicted, got %v instead", tc.expectedEvictedPodCount, actualEvictedPodCount) + return false, nil + } + meetsExpectations = true + return true, nil + }); err != nil { + t.Errorf("Error waiting for descheduler running: %v", err) + } + + if !meetsExpectations { + t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedPodCount, tc.expectedEvictedPodCount) } else { - t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedCount, tc.expectedEvictedCount) + t.Logf("Total of %d Pods were evicted for %s", actualEvictedPodCount, tc.name) } }) }