diff --git a/README.md b/README.md index 1620d169c..e01f3a6c9 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,8 @@ See the [user guide](docs/user-guide.md) in the `/docs` directory. ## Policy and Strategies Descheduler's policy is configurable and includes strategies that can be enabled or disabled. -Six strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, -`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints` and `RemovePodsHavingTooManyRestarts` +Seven strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, +`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints`, `RemovePodsHavingTooManyRestarts`, and `PodLifeTime` are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too. By default, all strategies are enabled. @@ -198,6 +198,21 @@ strategies: includingInitContainers: true ``` +### PodLifeTime + +This strategy evicts pods that are older than `.strategies.PodLifeTime.params.maxPodLifeTimeSeconds` The policy +file should look like: + +```` +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "PodLifeTime": + enabled: true + params: + maxPodLifeTimeSeconds: 86400 +```` + ## Pod Evictions When the descheduler decides to evict pods from a node, it employs the following general mechanism: @@ -265,7 +280,6 @@ Learn how to engage with the Kubernetes community on the [community page](http:/ This roadmap is not in any particular order. * Consideration of pod affinity -* Strategy to consider pod life time * Strategy to consider number of pending pods * Integration with cluster autoscaler * Integration with metrics providers for obtaining real load metrics diff --git a/examples/pod-life-time.yml b/examples/pod-life-time.yml new file mode 100644 index 000000000..ad0eb3e08 --- /dev/null +++ b/examples/pod-life-time.yml @@ -0,0 +1,20 @@ +--- +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "LowNodeUtilization": + enabled: false + "RemoveDuplicates": + enabled: false + "RemovePodsViolatingInterPodAntiAffinity": + enabled: false + "RemovePodsViolatingNodeAffinity": + enabled: false + "RemovePodsViolatingNodeTaints": + enabled: false + "RemovePodsHavingTooManyRestarts": + enabled: false + "PodLifeTime": + enabled: true + params: + maxPodLifeTimeSeconds: 604800 # 7 days diff --git a/pkg/api/types.go b/pkg/api/types.go index 48d5e0376..34538dcfe 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -17,7 +17,7 @@ limitations under the License. package api import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -49,6 +49,7 @@ type StrategyParameters struct { NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds NodeAffinityType []string PodsHavingTooManyRestarts *PodsHavingTooManyRestarts + MaxPodLifeTimeSeconds *uint } type Percentage float64 diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index 5ba89673c..c03c2fe42 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -17,7 +17,7 @@ limitations under the License. package v1alpha1 import ( - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -49,6 +49,7 @@ type StrategyParameters struct { NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"` NodeAffinityType []string `json:"nodeAffinityType,omitempty"` PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"` + MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"` } type Percentage float64 diff --git a/pkg/api/v1alpha1/zz_generated.conversion.go b/pkg/api/v1alpha1/zz_generated.conversion.go index efd67877c..22ea30d9c 100644 --- a/pkg/api/v1alpha1/zz_generated.conversion.go +++ b/pkg/api/v1alpha1/zz_generated.conversion.go @@ -186,6 +186,7 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds)) out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType)) out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts)) + out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds)) return nil } @@ -198,6 +199,7 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S out.NodeResourceUtilizationThresholds = (*NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds)) out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType)) out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts)) + out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds)) return nil } diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 6563cef6c..035395163 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -181,6 +181,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) { *out = new(PodsHavingTooManyRestarts) **out = **in } + if in.MaxPodLifeTimeSeconds != nil { + in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds + *out = new(uint) + **out = **in + } return } diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index 79a47e1ae..857c32738 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -181,6 +181,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) { *out = new(PodsHavingTooManyRestarts) **out = **in } + if in.MaxPodLifeTimeSeconds != nil { + in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds + *out = new(uint) + **out = **in + } return } diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index a755e92ba..748fdfe9e 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -74,6 +74,7 @@ func RunDeschedulerStrategies(rs *options.DeschedulerServer, deschedulerPolicy * "RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity, "RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints, "RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts, + "PodLifeTime": strategies.PodLifeTime, } wait.Until(func() { diff --git a/pkg/descheduler/strategies/pod_lifetime.go b/pkg/descheduler/strategies/pod_lifetime.go new file mode 100644 index 000000000..ad1f5d9d2 --- /dev/null +++ b/pkg/descheduler/strategies/pod_lifetime.go @@ -0,0 +1,69 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + v1 "k8s.io/api/core/v1" + v1meta "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog" + + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" +) + +// PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago. +func PodLifeTime(client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) { + if strategy.Params.MaxPodLifeTimeSeconds == nil { + klog.V(1).Infof("MaxPodLifeTimeSeconds not set") + return + } + + for _, node := range nodes { + klog.V(1).Infof("Processing node: %#v", node.Name) + pods := listOldPodsOnNode(client, node, *strategy.Params.MaxPodLifeTimeSeconds, evictLocalStoragePods) + for _, pod := range pods { + success, err := podEvictor.EvictPod(pod, node) + if success { + klog.V(1).Infof("Evicted pod: %#v\n because it was created more than %v seconds ago", pod.Name, *strategy.Params.MaxPodLifeTimeSeconds) + } + + if err != nil { + klog.Errorf("Error evicting pod: (%#v)", err) + break + } + } + } +} + +func listOldPodsOnNode(client clientset.Interface, node *v1.Node, maxAge uint, evictLocalStoragePods bool) []*v1.Pod { + pods, err := podutil.ListEvictablePodsOnNode(client, node, evictLocalStoragePods) + if err != nil { + return nil + } + + var oldPods []*v1.Pod + for _, pod := range pods { + podAgeSeconds := uint(v1meta.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds()) + if podAgeSeconds > maxAge { + oldPods = append(oldPods, pod) + } + } + + return oldPods +} diff --git a/pkg/descheduler/strategies/pod_lifetime_test.go b/pkg/descheduler/strategies/pod_lifetime_test.go new file mode 100644 index 000000000..062943e76 --- /dev/null +++ b/pkg/descheduler/strategies/pod_lifetime_test.go @@ -0,0 +1,167 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "testing" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" + "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + "sigs.k8s.io/descheduler/test" +) + +func TestPodLifeTime(t *testing.T) { + node := test.BuildTestNode("n1", 2000, 3000, 10, nil) + olderPodCreationTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)) + newerPodCreationTime := metav1.NewTime(time.Now()) + + // Setup pods, one should be evicted + p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil) + p1.Namespace = "dev" + p1.ObjectMeta.CreationTimestamp = newerPodCreationTime + p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil) + p2.Namespace = "dev" + p2.ObjectMeta.CreationTimestamp = olderPodCreationTime + + ownerRef1 := test.GetReplicaSetOwnerRefList() + p1.ObjectMeta.OwnerReferences = ownerRef1 + p2.ObjectMeta.OwnerReferences = ownerRef1 + + // Setup pods, zero should be evicted + p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil) + p3.Namespace = "dev" + p3.ObjectMeta.CreationTimestamp = newerPodCreationTime + p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil) + p4.Namespace = "dev" + p4.ObjectMeta.CreationTimestamp = newerPodCreationTime + + ownerRef2 := test.GetReplicaSetOwnerRefList() + p3.ObjectMeta.OwnerReferences = ownerRef2 + p4.ObjectMeta.OwnerReferences = ownerRef2 + + // Setup pods, one should be evicted + p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil) + p5.Namespace = "dev" + p5.ObjectMeta.CreationTimestamp = newerPodCreationTime + p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil) + p6.Namespace = "dev" + p6.ObjectMeta.CreationTimestamp = metav1.NewTime(time.Now().Add(time.Second * 605)) + + ownerRef3 := test.GetReplicaSetOwnerRefList() + p5.ObjectMeta.OwnerReferences = ownerRef3 + p6.ObjectMeta.OwnerReferences = ownerRef3 + + // Setup pods, zero should be evicted + p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil) + p7.Namespace = "dev" + p7.ObjectMeta.CreationTimestamp = newerPodCreationTime + p8 := test.BuildTestPod("p8", 100, 0, node.Name, nil) + p8.Namespace = "dev" + p8.ObjectMeta.CreationTimestamp = metav1.NewTime(time.Now().Add(time.Second * 595)) + + ownerRef4 := test.GetReplicaSetOwnerRefList() + p5.ObjectMeta.OwnerReferences = ownerRef4 + p6.ObjectMeta.OwnerReferences = ownerRef4 + + var maxLifeTime uint = 600 + testCases := []struct { + description string + strategy api.DeschedulerStrategy + maxPodsToEvict int + pods []v1.Pod + expectedEvictedPodCount int + }{ + { + description: "Two pods in the `dev` Namespace, 1 is new and 1 very is old. 1 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p1, *p2}, + expectedEvictedPodCount: 1, + }, + { + description: "Two pods in the `dev` Namespace, 2 are new and 0 are old. 0 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p3, *p4}, + expectedEvictedPodCount: 0, + }, + { + description: "Two pods in the `dev` Namespace, 1 created 605 seconds ago. 1 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p5, *p6}, + expectedEvictedPodCount: 1, + }, + { + description: "Two pods in the `dev` Namespace, 1 created 595 seconds ago. 0 should be evicted.", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + MaxPodLifeTimeSeconds: &maxLifeTime, + }, + }, + maxPodsToEvict: 5, + pods: []v1.Pod{*p7, *p8}, + expectedEvictedPodCount: 0, + }, + } + + for _, tc := range testCases { + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, &v1.PodList{Items: tc.pods}, nil + }) + fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) { + return true, node, nil + }) + podEvictor := evictions.NewPodEvictor( + fakeClient, + "v1", + false, + tc.maxPodsToEvict, + []*v1.Node{node}, + ) + + PodLifeTime(fakeClient, tc.strategy, []*v1.Node{node}, false, podEvictor) + podsEvicted := podEvictor.TotalEvicted() + if podsEvicted != tc.expectedEvictedPodCount { + t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.description, tc.expectedEvictedPodCount, podsEvicted) + } + } + +}