1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Consider pod image in duplicates strategy

This increases the specificity of the RemoveDuplicates strategy by
removing pods which not only have the same owner, but who also
must have the same list of container images. This also adds a
parameter, `ExcludeOwnerKinds` to the RemoveDuplicates strategy
which accepts a list of Kinds. If a pod has any of these Kinds as
an owner, that pod is not considered for eviction.
This commit is contained in:
Mike Dame
2020-05-01 14:28:44 -04:00
parent 04efe65f90
commit c6ff87dbd6
8 changed files with 228 additions and 26 deletions

View File

@@ -66,15 +66,21 @@ Replication Controller (RC), Deployment, or Job running on the same node. If the
those duplicate pods are evicted for better spreading of pods in a cluster. This issue could happen
if some nodes went down due to whatever reasons, and pods on them were moved to other nodes leading to
more than one pod associated with a RS or RC, for example, running on the same node. Once the failed nodes
are ready again, this strategy could be enabled to evict those duplicate pods. Currently, there are no
parameters associated with this strategy. To disable this strategy, the policy should look like:
are ready again, this strategy could be enabled to evict those duplicate pods.
It provides one optional parameter, `ExcludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveDuplicates":
enabled: false
enabled: true
params:
removeDuplicates:
excludeOwnerKinds:
- "ReplicaSet"
```
### LowNodeUtilization

View File

@@ -50,6 +50,7 @@ type StrategyParameters struct {
NodeAffinityType []string
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts
MaxPodLifeTimeSeconds *uint
RemoveDuplicates *RemoveDuplicates
}
type Percentage float64
@@ -65,3 +66,7 @@ type PodsHavingTooManyRestarts struct {
PodRestartThreshold int32
IncludingInitContainers bool
}
type RemoveDuplicates struct {
ExcludeOwnerKinds []string
}

View File

@@ -50,6 +50,7 @@ type StrategyParameters struct {
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"`
}
type Percentage float64
@@ -65,3 +66,7 @@ type PodsHavingTooManyRestarts struct {
PodRestartThreshold int32 `json:"podRestartThreshold,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
}
type RemoveDuplicates struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
}

View File

@@ -75,6 +75,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*RemoveDuplicates)(nil), (*api.RemoveDuplicates)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_RemoveDuplicates_To_api_RemoveDuplicates(a.(*RemoveDuplicates), b.(*api.RemoveDuplicates), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.RemoveDuplicates)(nil), (*RemoveDuplicates)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_RemoveDuplicates_To_v1alpha1_RemoveDuplicates(a.(*api.RemoveDuplicates), b.(*RemoveDuplicates), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*StrategyParameters)(nil), (*api.StrategyParameters)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_StrategyParameters_To_api_StrategyParameters(a.(*StrategyParameters), b.(*api.StrategyParameters), scope)
}); err != nil {
@@ -182,11 +192,32 @@ func Convert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts
return autoConvert_api_PodsHavingTooManyRestarts_To_v1alpha1_PodsHavingTooManyRestarts(in, out, s)
}
func autoConvert_v1alpha1_RemoveDuplicates_To_api_RemoveDuplicates(in *RemoveDuplicates, out *api.RemoveDuplicates, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
return nil
}
// Convert_v1alpha1_RemoveDuplicates_To_api_RemoveDuplicates is an autogenerated conversion function.
func Convert_v1alpha1_RemoveDuplicates_To_api_RemoveDuplicates(in *RemoveDuplicates, out *api.RemoveDuplicates, s conversion.Scope) error {
return autoConvert_v1alpha1_RemoveDuplicates_To_api_RemoveDuplicates(in, out, s)
}
func autoConvert_api_RemoveDuplicates_To_v1alpha1_RemoveDuplicates(in *api.RemoveDuplicates, out *RemoveDuplicates, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
return nil
}
// Convert_api_RemoveDuplicates_To_v1alpha1_RemoveDuplicates is an autogenerated conversion function.
func Convert_api_RemoveDuplicates_To_v1alpha1_RemoveDuplicates(in *api.RemoveDuplicates, out *RemoveDuplicates, s conversion.Scope) error {
return autoConvert_api_RemoveDuplicates_To_v1alpha1_RemoveDuplicates(in, out, s)
}
func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *StrategyParameters, out *api.StrategyParameters, s conversion.Scope) error {
out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.RemoveDuplicates = (*api.RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
return nil
}
@@ -200,6 +231,7 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.RemoveDuplicates = (*RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
return nil
}

View File

@@ -119,6 +119,27 @@ func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemoveDuplicates) DeepCopyInto(out *RemoveDuplicates) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveDuplicates.
func (in *RemoveDuplicates) DeepCopy() *RemoveDuplicates {
if in == nil {
return nil
}
out := new(RemoveDuplicates)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) {
{
@@ -186,6 +207,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(uint)
**out = **in
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
return
}

View File

@@ -119,6 +119,27 @@ func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemoveDuplicates) DeepCopyInto(out *RemoveDuplicates) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveDuplicates.
func (in *RemoveDuplicates) DeepCopy() *RemoveDuplicates {
if in == nil {
return nil
}
out := new(RemoveDuplicates)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in ResourceThresholds) DeepCopyInto(out *ResourceThresholds) {
{
@@ -186,6 +207,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(uint)
**out = **in
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
return
}

View File

@@ -18,9 +18,13 @@ package strategies
import (
"context"
"reflect"
"sort"
"strings"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
@@ -31,7 +35,8 @@ import (
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
// namespace. As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages.
// namespace, and have at least one container with the same image.
// As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages.
func RemoveDuplicatePods(
ctx context.Context,
client clientset.Interface,
@@ -42,41 +47,83 @@ func RemoveDuplicatePods(
) {
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
dpm := listDuplicatePodsOnANode(ctx, client, node, evictLocalStoragePods)
for creator, pods := range dpm {
if len(pods) > 1 {
klog.V(1).Infof("%#v", creator)
// i = 0 does not evict the first pod
for i := 1; i < len(pods); i++ {
if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil {
duplicatePods := listDuplicatePodsOnANode(ctx, client, node, strategy, evictLocalStoragePods)
for _, pod := range duplicatePods {
if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
}
}
}
}
}
}
//type creator string
type duplicatePodsMap map[string][]*v1.Pod
// listDuplicatePodsOnANode lists duplicate pods on a given node.
func listDuplicatePodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, evictLocalStoragePods bool) duplicatePodsMap {
// It checks for pods which have the same owner and have at least 1 container with the same image spec
func listDuplicatePodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, strategy api.DeschedulerStrategy, evictLocalStoragePods bool) []*v1.Pod {
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
if err != nil {
return nil
}
dpm := duplicatePodsMap{}
// Ignoring the error here as in the ListDuplicatePodsOnNode function we call ListEvictablePodsOnNode which checks for error.
duplicatePods := make([]*v1.Pod, 0, len(pods))
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
// This list of keys representing a single pod is then sorted alphabetically.
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
// - The 2 pods have the exact same ownerrefs
// - The 2 pods have the exact same container images
//
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
// If not, then we add this pod's list to the list of lists for that key.
duplicateKeysMap := map[string][][]string{}
for _, pod := range pods {
ownerRefList := podutil.OwnerRef(pod)
if hasExcludedOwnerRefKind(ownerRefList, strategy) {
continue
}
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
for _, ownerRef := range ownerRefList {
for _, container := range pod.Spec.Containers {
// Namespace/Kind/Name should be unique for the cluster.
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name}, "/")
dpm[s] = append(dpm[s], pod)
// We also consider the image, as 2 pods could have the same owner but serve different purposes
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/")
podContainerKeys = append(podContainerKeys, s)
}
}
return dpm
sort.Strings(podContainerKeys)
// If there have been any other pods with the same first "key", look through all the lists to see if any match
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
for _, keys := range existing {
if reflect.DeepEqual(keys, podContainerKeys) {
duplicatePods = append(duplicatePods, pod)
break
}
// Found no matches, add this list of keys to the list of lists that have the same first key
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
}
} else {
// This is the first pod we've seen that has this first "key" entry
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
}
}
return duplicatePods
}
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool {
if strategy.Params.RemoveDuplicates == nil {
return false
}
exclude := sets.NewString(strategy.Params.RemoveDuplicates.ExcludeOwnerKinds...)
for _, owner := range ownerRefs {
if exclude.Has(owner.Kind) {
return true
}
}
return false
}

View File

@@ -52,6 +52,14 @@ func TestFindDuplicatePods(t *testing.T) {
p9.Namespace = "test"
p10 := test.BuildTestPod("p10", 100, 0, node.Name, nil)
p10.Namespace = "test"
p11 := test.BuildTestPod("p11", 100, 0, node.Name, nil)
p11.Namespace = "different-images"
p12 := test.BuildTestPod("p12", 100, 0, node.Name, nil)
p12.Namespace = "different-images"
p13 := test.BuildTestPod("p13", 100, 0, node.Name, nil)
p13.Namespace = "different-images"
p14 := test.BuildTestPod("p14", 100, 0, node.Name, nil)
p14.Namespace = "different-images"
// ### Evictable Pods ###
@@ -92,41 +100,88 @@ func TestFindDuplicatePods(t *testing.T) {
priority := utils.SystemCriticalPriority
p7.Spec.Priority = &priority
// Same owners, but different images
p11.Spec.Containers[0].Image = "foo"
p11.ObjectMeta.OwnerReferences = ownerRef1
p12.Spec.Containers[0].Image = "bar"
p12.ObjectMeta.OwnerReferences = ownerRef1
// Multiple containers
p13.ObjectMeta.OwnerReferences = ownerRef1
p13.Spec.Containers = append(p13.Spec.Containers, v1.Container{
Name: "foo",
Image: "foo",
})
testCases := []struct {
description string
maxPodsToEvict int
pods []v1.Pod
expectedEvictedPodCount int
strategy api.DeschedulerStrategy
}{
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvict: 5,
pods: []v1.Pod{*p1, *p2, *p3},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.",
maxPodsToEvict: 5,
pods: []v1.Pod{*p1, *p2, *p3},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
},
{
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvict: 5,
pods: []v1.Pod{*p8, *p9, *p10},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.",
maxPodsToEvict: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods are: part of DaemonSet, with local storage, mirror pod annotation, critical pod annotation - none should be evicted.",
maxPodsToEvict: 2,
pods: []v1.Pod{*p4, *p5, *p6, *p7},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Test all Pods: 4 should be evicted.",
maxPodsToEvict: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with the same owner but different images should not be evicted",
maxPodsToEvict: 5,
pods: []v1.Pod{*p11, *p12},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with multiple containers should not match themselves",
maxPodsToEvict: 5,
pods: []v1.Pod{*p13},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with matching ownerrefs and at not all matching image should not trigger an eviction",
maxPodsToEvict: 5,
pods: []v1.Pod{*p11, *p13},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
}
@@ -146,7 +201,7 @@ func TestFindDuplicatePods(t *testing.T) {
[]*v1.Node{node},
)
RemoveDuplicatePods(ctx, fakeClient, api.DeschedulerStrategy{}, []*v1.Node{node}, false, podEvictor)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, false, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)