From 311d75223fa786190957ab3b8041b196631e9dd1 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Sat, 4 Jun 2022 22:24:44 -0400 Subject: [PATCH] PodLifeTime: sort pods by creation timestamp --- pkg/descheduler/pod/pods.go | 7 ++ pkg/descheduler/pod/pods_test.go | 27 +++++++ pkg/descheduler/strategies/pod_lifetime.go | 27 ++++--- .../strategies/pod_lifetime_test.go | 58 ++++++++++++-- test/e2e/e2e_test.go | 79 ++++++++++++++++--- 5 files changed, 172 insertions(+), 26 deletions(-) diff --git a/pkg/descheduler/pod/pods.go b/pkg/descheduler/pod/pods.go index ef3f0ab32..15390ec43 100644 --- a/pkg/descheduler/pod/pods.go +++ b/pkg/descheduler/pod/pods.go @@ -228,3 +228,10 @@ func SortPodsBasedOnPriorityLowToHigh(pods []*v1.Pod) { return *pods[i].Spec.Priority < *pods[j].Spec.Priority }) } + +// SortPodsBasedOnAge sorts Pods from oldest to most recent in place +func SortPodsBasedOnAge(pods []*v1.Pod) { + sort.Slice(pods, func(i, j int) bool { + return pods[i].CreationTimestamp.Before(&pods[j].CreationTimestamp) + }) +} diff --git a/pkg/descheduler/pod/pods_test.go b/pkg/descheduler/pod/pods_test.go index beb102c6c..e5b9005a6 100644 --- a/pkg/descheduler/pod/pods_test.go +++ b/pkg/descheduler/pod/pods_test.go @@ -18,8 +18,11 @@ package pod import ( "context" + "fmt" + "math/rand" "reflect" "testing" + "time" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -154,3 +157,27 @@ func TestSortPodsBasedOnPriorityLowToHigh(t *testing.T) { t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1]) } } + +func TestSortPodsBasedOnAge(t *testing.T) { + podList := make([]*v1.Pod, 9) + n1 := test.BuildTestNode("n1", 4000, 3000, int64(len(podList)), nil) + + for i := 0; i < len(podList); i++ { + podList[i] = test.BuildTestPod(fmt.Sprintf("p%d", i), 1, 32, n1.Name, func(pod *v1.Pod) { + creationTimestamp := metav1.Now().Add(time.Minute * time.Duration(i)) + pod.ObjectMeta.SetCreationTimestamp(metav1.NewTime(creationTimestamp)) + }) + } + + rand.Seed(time.Now().UnixNano()) + rand.Shuffle(len(podList), func(i, j int) { podList[i], podList[j] = podList[j], podList[i] }) + + SortPodsBasedOnAge(podList) + + for i := 0; i < len(podList); i++ { + expectedName := fmt.Sprintf("p%d", i) + if podList[i].GetName() != expectedName { + t.Errorf("Expected pod %s to be at index %d", expectedName, i) + } + } +} diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go index bf31892da..fa5e91f47 100644 --- a/pkg/descheduler/strategies/pod_lifetime.go +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -100,22 +100,31 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D return } + podsToEvict := make([]*v1.Pod, 0) + nodeMap := make(map[string]*v1.Node, len(nodes)) + for _, node := range nodes { + nodeMap[node.Name] = node klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) pods := listOldPodsOnNode(node.Name, getPodsAssignedToNode, podFilter, *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) - for _, pod := range pods { - success, err := podEvictor.EvictPod(ctx, pod, node, "PodLifeTime") - if success { - klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) - } + podsToEvict = append(podsToEvict, pods...) + } - if err != nil { - klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) - break - } + // Should sort Pods so that the oldest can be evicted first + // in the event that PDB or settings such maxNoOfPodsToEvictPer* prevent too much eviction + podutil.SortPodsBasedOnAge(podsToEvict) + + for _, pod := range podsToEvict { + success, err := podEvictor.EvictPod(ctx, pod, nodeMap[pod.Spec.NodeName], "PodLifeTime") + if success { + klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds) } + if err != nil { + klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod)) + break + } } } diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go index 8c08cab62..4b7f21a9a 100644 --- a/pkg/descheduler/strategies/pod_lifetime_test.go +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -139,12 +139,14 @@ func TestPodLifeTime(t *testing.T) { var maxLifeTime uint = 600 testCases := []struct { - description string - strategy api.DeschedulerStrategy - pods []*v1.Pod - nodes []*v1.Node - expectedEvictedPodCount uint - ignorePvcPods bool + description string + strategy api.DeschedulerStrategy + pods []*v1.Pod + nodes []*v1.Node + expectedEvictedPodCount uint + ignorePvcPods bool + maxPodsToEvictPerNode *uint + maxPodsToEvictPerNamespace *uint }{ { description: "Two pods in the `dev` Namespace, 1 is new and 1 very is old. 1 should be evicted.", @@ -264,6 +266,46 @@ func TestPodLifeTime(t *testing.T) { nodes: []*v1.Node{node1}, expectedEvictedPodCount: 0, }, + { + description: "2 Oldest pods should be evicted when maxPodsToEvictPerNode and maxPodsToEvictPerNamespace are not set", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: &api.StrategyParameters{ + PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, + }, + }, + pods: []*v1.Pod{p1, p2, p9}, + nodes: []*v1.Node{node1}, + expectedEvictedPodCount: 2, + maxPodsToEvictPerNode: nil, + maxPodsToEvictPerNamespace: nil, + }, + { + description: "1 Oldest pod should be evicted when maxPodsToEvictPerNamespace is set to 1", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: &api.StrategyParameters{ + PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, + }, + }, + pods: []*v1.Pod{p1, p2, p9}, + nodes: []*v1.Node{node1}, + maxPodsToEvictPerNamespace: func(i uint) *uint { return &i }(1), + expectedEvictedPodCount: 1, + }, + { + description: "1 Oldest pod should be evicted when maxPodsToEvictPerNode is set to 1", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: &api.StrategyParameters{ + PodLifeTime: &api.PodLifeTime{MaxPodLifeTimeSeconds: &maxLifeTime}, + }, + }, + pods: []*v1.Pod{p1, p2, p9}, + nodes: []*v1.Node{node1}, + maxPodsToEvictPerNode: func(i uint) *uint { return &i }(1), + expectedEvictedPodCount: 1, + }, } for _, tc := range testCases { @@ -295,8 +337,8 @@ func TestPodLifeTime(t *testing.T) { fakeClient, policyv1.SchemeGroupVersion.String(), false, - nil, - nil, + tc.maxPodsToEvictPerNode, + tc.maxPodsToEvictPerNamespace, tc.nodes, getPodsAssignedToNode, false, diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ff928cbe8..c060d769a 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -162,6 +162,7 @@ func runPodLifetimeStrategy( priorityClass string, priority *int32, evictCritical bool, + maxPodsToEvictPerNamespace *uint, labelSelector *metav1.LabelSelector, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, ) { @@ -196,7 +197,7 @@ func runPodLifetimeStrategy( evictionPolicyGroupVersion, false, nil, - nil, + maxPodsToEvictPerNamespace, nodes, getPodsAssignedToNode, false, @@ -418,7 +419,7 @@ func TestNamespaceConstraintsInclude(t *testing.T) { t.Logf("set the strategy to delete pods from %v namespace", rc.Namespace) runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, &deschedulerapi.Namespaces{ Include: []string{rc.Namespace}, - }, "", nil, false, nil, getPodsAssignedToNode) + }, "", nil, false, nil, nil, getPodsAssignedToNode) // All pods are supposed to be deleted, wait until all the old pods are deleted if err := wait.PollImmediate(time.Second, 20*time.Second, func() (bool, error) { @@ -489,7 +490,7 @@ func TestNamespaceConstraintsExclude(t *testing.T) { t.Logf("set the strategy to delete pods from namespaces except the %v namespace", rc.Namespace) runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, &deschedulerapi.Namespaces{ Exclude: []string{rc.Namespace}, - }, "", nil, false, nil, getPodsAssignedToNode) + }, "", nil, false, nil, nil, getPodsAssignedToNode) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -602,9 +603,9 @@ func testEvictSystemCritical(t *testing.T, isPriorityClass bool) { t.Logf("Existing pods: %v", initialPodNames) if isPriorityClass { - runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, true, nil, getPodsAssignedToNode) + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, true, nil, nil, getPodsAssignedToNode) } else { - runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, true, nil, getPodsAssignedToNode) + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, true, nil, nil, getPodsAssignedToNode) } // All pods are supposed to be deleted, wait until all pods in the test namespace are terminating @@ -721,10 +722,10 @@ func testPriority(t *testing.T, isPriorityClass bool) { if isPriorityClass { t.Logf("set the strategy to delete pods with priority lower than priority class %s", highPriorityClass.Name) - runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, false, nil, getPodsAssignedToNode) + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, highPriorityClass.Name, nil, false, nil, nil, getPodsAssignedToNode) } else { t.Logf("set the strategy to delete pods with priority lower than %d", highPriority) - runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, false, nil, getPodsAssignedToNode) + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", &highPriority, false, nil, nil, getPodsAssignedToNode) } t.Logf("Waiting 10s") @@ -828,7 +829,7 @@ func TestPodLabelSelector(t *testing.T) { t.Logf("Pods not expected to be evicted: %v, pods expected to be evicted: %v", expectReservePodNames, expectEvictPodNames) t.Logf("set the strategy to delete pods with label test:podlifetime-evict") - runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", nil, false, &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}}, getPodsAssignedToNode) + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", nil, false, nil, &metav1.LabelSelector{MatchLabels: map[string]string{"test": "podlifetime-evict"}}, getPodsAssignedToNode) t.Logf("Waiting 10s") time.Sleep(10 * time.Second) @@ -928,7 +929,7 @@ func TestEvictAnnotation(t *testing.T) { t.Logf("Existing pods: %v", initialPodNames) t.Log("Running PodLifetime strategy") - runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", nil, false, nil, getPodsAssignedToNode) + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", nil, false, nil, nil, getPodsAssignedToNode) if err := wait.PollImmediate(5*time.Second, time.Minute, func() (bool, error) { podList, err = clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()}) @@ -952,6 +953,66 @@ func TestEvictAnnotation(t *testing.T) { } } +func TestPodLifeTimeOldestEvicted(t *testing.T) { + ctx := context.Background() + + clientSet, nodeInformer, getPodsAssignedToNode, stopCh := initializeClient(t) + defer close(stopCh) + + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + + t.Log("Create RC with 1 pod for testing oldest pod getting evicted") + rc := RcByNameContainer("test-rc-pod-lifetime-oldest-evicted", testNamespace.Name, int32(1), map[string]string{"test": "oldest"}, nil, "") + if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil { + t.Errorf("Error creating deployment %v", err) + } + defer deleteRC(ctx, t, clientSet, rc) + + waitForRCPodsRunning(ctx, t, clientSet, rc) + + podList, err := clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()}) + if err != nil { + t.Fatalf("Unable to list pods: %v", err) + } + oldestPod := podList.Items[0] + + t.Log("Scale the rs to 5 replicas with the 4 new pods having a more recent creation timestamp") + rc.Spec.Replicas = func(i int32) *int32 { return &i }(5) + rc, err = clientSet.CoreV1().ReplicationControllers(rc.Namespace).Update(ctx, rc, metav1.UpdateOptions{}) + if err != nil { + t.Errorf("Error updating deployment %v", err) + } + waitForRCPodsRunning(ctx, t, clientSet, rc) + + podList, err = clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()}) + if err != nil { + t.Fatalf("Unable to list pods: %v", err) + } + + t.Log("Running PodLifetime strategy with maxPodsToEvictPerNamespace=1 to ensure only the oldest pod is evicted") + var maxPodsToEvictPerNamespace uint = 1 + runPodLifetimeStrategy(ctx, t, clientSet, nodeInformer, nil, "", nil, false, &maxPodsToEvictPerNamespace, nil, getPodsAssignedToNode) + t.Log("Finished PodLifetime strategy") + + t.Logf("Wait for terminating pod to disappear") + waitForTerminatingPodsToDisappear(ctx, t, clientSet, rc.Namespace) + + podList, err = clientSet.CoreV1().Pods(rc.Namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rc.Spec.Template.Labels).String()}) + if err != nil { + t.Fatalf("Unable to list pods: %v", err) + } + + for _, pod := range podList.Items { + if pod.GetName() == oldestPod.GetName() { + t.Errorf("The oldest Pod %s was not evicted", oldestPod.GetName()) + } + } +} + func TestDeschedulingInterval(t *testing.T) { ctx := context.Background() clientSet, err := client.CreateClient(os.Getenv("KUBECONFIG"))