From 423ee35846a85a7cb7b9ab48bfa395141afdd7df Mon Sep 17 00:00:00 2001 From: Sean Malloy Date: Thu, 23 Apr 2020 23:41:37 -0500 Subject: [PATCH] Add New PodLifeTime Strategy The new PodLifeTime descheduler strategy can be used to evict pods that were created more than the configured number of seconds ago. In the below example pods created more than 24 hours ago will be evicted. ```` apiVersion: "descheduler/v1alpha1" kind: "DeschedulerPolicy" strategies: "PodLifeTime": enabled: true params: maxPodLifeTimeSeconds: 86400 ```` --- README.md | 20 ++- examples/pod-life-time.yml | 20 +++ pkg/api/types.go | 3 +- pkg/api/v1alpha1/types.go | 3 +- pkg/api/v1alpha1/zz_generated.conversion.go | 2 + pkg/api/v1alpha1/zz_generated.deepcopy.go | 5 + pkg/api/zz_generated.deepcopy.go | 5 + pkg/descheduler/descheduler.go | 1 + pkg/descheduler/strategies/pod_lifetime.go | 69 ++++++++ .../strategies/pod_lifetime_test.go | 167 ++++++++++++++++++ 10 files changed, 290 insertions(+), 5 deletions(-) create mode 100644 examples/pod-life-time.yml create mode 100644 pkg/descheduler/strategies/pod_lifetime.go create mode 100644 pkg/descheduler/strategies/pod_lifetime_test.go diff --git a/README.md b/README.md index 1620d169c..e01f3a6c9 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,8 @@ See the [user guide](docs/user-guide.md) in the `/docs` directory. ## Policy and Strategies Descheduler's policy is configurable and includes strategies that can be enabled or disabled. -Six strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, -`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints` and `RemovePodsHavingTooManyRestarts` +Seven strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, +`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints`, `RemovePodsHavingTooManyRestarts`, and `PodLifeTime` are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too. By default, all strategies are enabled. @@ -198,6 +198,21 @@ strategies: includingInitContainers: true ``` +### PodLifeTime + +This strategy evicts pods that are older than `.strategies.PodLifeTime.params.maxPodLifeTimeSeconds` The policy +file should look like: + +```` +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "PodLifeTime": + enabled: true + params: + maxPodLifeTimeSeconds: 86400 +```` + ## Pod Evictions When the descheduler decides to evict pods from a node, it employs the following general mechanism: @@ -265,7 +280,6 @@ Learn how to engage with the Kubernetes community on the [community page](http:/ This roadmap is not in any particular order. * Consideration of pod affinity -* Strategy to consider pod life time * Strategy to consider number of pending pods * Integration with cluster autoscaler * Integration with metrics providers for obtaining real load metrics diff --git a/examples/pod-life-time.yml b/examples/pod-life-time.yml new file mode 100644 index 000000000..ad0eb3e08 --- /dev/null +++ b/examples/pod-life-time.yml @@ -0,0 +1,20 @@ +--- +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "LowNodeUtilization": + enabled: false + "RemoveDuplicates": + enabled: false + "RemovePodsViolatingInterPodAntiAffinity": + enabled: false + "RemovePodsViolatingNodeAffinity": + enabled: false + "RemovePodsViolatingNodeTaints": + enabled: false + "RemovePodsHavingTooManyRestarts": + enabled: false + "PodLifeTime": + enabled: true + params: + maxPodLifeTimeSeconds: 604800 # 7 days diff --git a/pkg/api/types.go b/pkg/api/types.go index 48d5e0376..34538dcfe 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -17,7 +17,7 @@ limitations under the License. package api import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -49,6 +49,7 @@ type StrategyParameters struct { NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds NodeAffinityType []string PodsHavingTooManyRestarts *PodsHavingTooManyRestarts + MaxPodLifeTimeSeconds *uint } type Percentage float64 diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index 5ba89673c..c03c2fe42 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -17,7 +17,7 @@ limitations under the License. package v1alpha1 import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -49,6 +49,7 @@ type StrategyParameters struct { NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"` NodeAffinityType []string `json:"nodeAffinityType,omitempty"` PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"` + MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"` } type Percentage float64 diff --git a/pkg/api/v1alpha1/zz_generated.conversion.go b/pkg/api/v1alpha1/zz_generated.conversion.go index efd67877c..22ea30d9c 100644 --- a/pkg/api/v1alpha1/zz_generated.conversion.go +++ b/pkg/api/v1alpha1/zz_generated.conversion.go @@ -186,6 +186,7 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds)) out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType)) out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts)) + out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds)) return nil } @@ -198,6 +199,7 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S out.NodeResourceUtilizationThresholds = (*NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds)) out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType)) out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts)) + out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds)) return nil } diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 6563cef6c..035395163 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -181,6 +181,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) { *out = new(PodsHavingTooManyRestarts) **out = **in } + if in.MaxPodLifeTimeSeconds != nil { + in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds + *out = new(uint) + **out = **in + } return } diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index 79a47e1ae..857c32738 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -181,6 +181,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) { *out = new(PodsHavingTooManyRestarts) **out = **in } + if in.MaxPodLifeTimeSeconds != nil { + in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds + *out = new(uint) + **out = **in + } return } diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index a755e92ba..748fdfe9e 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -74,6 +74,7 @@ func RunDeschedulerStrategies(rs *options.DeschedulerServer, deschedulerPolicy * "RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity, "RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints, "RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts, + "PodLifeTime": strategies.PodLifeTime, } wait.Until(func() { diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go new file mode 100644 index 000000000..ad1f5d9d2 --- /dev/null +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -0,0 +1,69 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + v1 "k8s.io/api/core/v1" + v1meta "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog" + + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" +) + +// PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago. +func PodLifeTime(client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { + if strategy.Params.MaxPodLifeTimeSeconds == nil { + klog.V(1).Infof("MaxPodLifeTimeSeconds not set") + return + } + + for _, node := range nodes { + klog.V(1).Infof("Processing node: %#v", node.Name) + pods := listOldPodsOnNode(client, node, *strategy.Params.MaxPodLifeTimeSeconds, evictLocalStoragePods) + for _, pod := range pods { + success, err := podEvictor.EvictPod(pod, node) + if success { + klog.V(1).Infof("Evicted pod: %#v\n because it was created more than %v seconds ago", pod.Name, *strategy.Params.MaxPodLifeTimeSeconds) + } + + if err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) + break + } + } + } +} + +func listOldPodsOnNode(client clientset.Interface, node *v1.Node, maxAge uint, evictLocalStoragePods bool) []*v1.Pod { + pods, err := podutil.ListEvictablePodsOnNode(client, node, evictLocalStoragePods) + if err != nil { + return nil + } + + var oldPods []*v1.Pod + for _, pod := range pods { + podAgeSeconds := uint(v1meta.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) + if podAgeSeconds > maxAge { + oldPods = append(oldPods, pod) + } + } + + return oldPods +} diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go new file mode 100644 index 000000000..062943e76 --- /dev/null +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -0,0 +1,167 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "testing" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + "sigs.k8s.io/descheduler/test" +) + +func TestPodLifeTime(t *testing.T) { + node := test.BuildTestNode("n1", 2000, 3000, 10, nil) + olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) + newerPodCreationTime := metav1.NewTime(time.Now()) + + // Setup pods, one should be evicted + p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil) + p1.Namespace = "dev" + p1.ObjectMeta.CreationTimestamp = newerPodCreationTime + p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil) + p2.Namespace = "dev" + p2.ObjectMeta.CreationTimestamp = olderPodCreationTime + + ownerRef1 := test.GetReplicaSetOwnerRefList() + p1.ObjectMeta.OwnerReferences = ownerRef1 + p2.ObjectMeta.OwnerReferences = ownerRef1 + + // Setup pods, zero should be evicted + p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil) + p3.Namespace = "dev" + p3.ObjectMeta.CreationTimestamp = newerPodCreationTime + p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil) + p4.Namespace = "dev" + p4.ObjectMeta.CreationTimestamp = newerPodCreationTime + + ownerRef2 := test.GetReplicaSetOwnerRefList() + p3.ObjectMeta.OwnerReferences = ownerRef2 + p4.ObjectMeta.OwnerReferences = ownerRef2 + + // Setup pods, one should be evicted + p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil) + p5.Namespace = "dev" + p5.ObjectMeta.CreationTimestamp = newerPodCreationTime + p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil) + p6.Namespace = "dev" + p6.ObjectMeta.CreationTimestamp = metav1.NewTime(time.Now().Add(time.Second * 605)) + + ownerRef3 := test.GetReplicaSetOwnerRefList() + p5.ObjectMeta.OwnerReferences = ownerRef3 + p6.ObjectMeta.OwnerReferences = ownerRef3 + + // Setup pods, zero should be evicted + p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil) + p7.Namespace = "dev" + p7.ObjectMeta.CreationTimestamp = newerPodCreationTime + p8 := test.BuildTestPod("p8", 100, 0, node.Name, nil) + p8.Namespace = "dev" + p8.ObjectMeta.CreationTimestamp = metav1.NewTime(time.Now().Add(time.Second * 595)) + + ownerRef4 := test.GetReplicaSetOwnerRefList() + p5.ObjectMeta.OwnerReferences = ownerRef4 + p6.ObjectMeta.OwnerReferences = ownerRef4 + + var maxLifeTime uint = 600 + testCases := []struct { + description string + strategy api.DeschedulerStrategy + maxPodsToEvict int + pods []v1.Pod + expectedEvictedPodCount int + }{ + { + description: "Two pods in the `dev` Namespace, 1 is new and 1 very is old. 1 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p1, *p2}, + expectedEvictedPodCount: 1, + }, + { + description: "Two pods in the `dev` Namespace, 2 are new and 0 are old. 0 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p3, *p4}, + expectedEvictedPodCount: 0, + }, + { + description: "Two pods in the `dev` Namespace, 1 created 605 seconds ago. 1 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p5, *p6}, + expectedEvictedPodCount: 1, + }, + { + description: "Two pods in the `dev` Namespace, 1 created 595 seconds ago. 0 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p7, *p8}, + expectedEvictedPodCount: 0, + }, + } + + for _, tc := range testCases { + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, &v1.PodList{Items: tc.pods}, nil + }) + fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) { + return true, node, nil + }) + podEvictor := evictions.NewPodEvictor( + fakeClient, + "v1", + false, + tc.maxPodsToEvict, + []*v1.Node{node}, + ) + + PodLifeTime(fakeClient, tc.strategy, []*v1.Node{node}, false, podEvictor) + podsEvicted := podEvictor.TotalEvicted() + if podsEvicted != tc.expectedEvictedPodCount { + t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.description, tc.expectedEvictedPodCount, podsEvicted) + } + } + +}