mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 13:29:11 +01:00
bring removeduplicates to plugin
This commit is contained in:
300
pkg/framework/plugins/removeduplicates/removeduplicates.go
Normal file
300
pkg/framework/plugins/removeduplicates/removeduplicates.go
Normal file
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package removeduplicates
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"math"
|
||||
"reflect"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/framework"
|
||||
)
|
||||
|
||||
const PluginName = "RemoveDuplicates"
|
||||
|
||||
// RemoveDuplicatePods removes the duplicate pods on node. This plugin evicts all duplicate pods on node.
|
||||
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
|
||||
// namespace, and have at least one container with the same image.
|
||||
// As of now, this plugin won't evict daemonsets, mirror pods, critical pods and pods with local storages.
|
||||
|
||||
type RemoveDuplicates struct {
|
||||
handle framework.Handle
|
||||
args *componentconfig.RemoveDuplicatesArgs
|
||||
podFilter podutil.FilterFunc
|
||||
}
|
||||
|
||||
var _ framework.Plugin = &RemoveDuplicates{}
|
||||
var _ framework.BalancePlugin = &RemoveDuplicates{}
|
||||
|
||||
type podOwner struct {
|
||||
namespace, kind, name string
|
||||
imagesHash string
|
||||
}
|
||||
|
||||
// New builds plugin from its arguments while passing a handle
|
||||
func New(args runtime.Object, handle framework.Handle) (framework.Plugin, error) {
|
||||
removeDuplicatesArgs, ok := args.(*componentconfig.RemoveDuplicatesArgs)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("want args to be of type RemoveDuplicatesArgs, got %T", args)
|
||||
}
|
||||
|
||||
var includedNamespaces, excludedNamespaces sets.String
|
||||
if removeDuplicatesArgs.Namespaces != nil {
|
||||
includedNamespaces = sets.NewString(removeDuplicatesArgs.Namespaces.Include...)
|
||||
excludedNamespaces = sets.NewString(removeDuplicatesArgs.Namespaces.Exclude...)
|
||||
}
|
||||
|
||||
podFilter, err := podutil.NewOptions().
|
||||
WithFilter(handle.Evictor().Filter).
|
||||
WithNamespaces(includedNamespaces).
|
||||
WithoutNamespaces(excludedNamespaces).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
|
||||
}
|
||||
|
||||
return &RemoveDuplicates{
|
||||
handle: handle,
|
||||
args: removeDuplicatesArgs,
|
||||
podFilter: podFilter,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name retrieves the plugin name
|
||||
func (r *RemoveDuplicates) Name() string {
|
||||
return PluginName
|
||||
}
|
||||
|
||||
// Balance extension point implementation for the plugin
|
||||
func (r *RemoveDuplicates) Balance(ctx context.Context, nodes []*v1.Node) *framework.Status {
|
||||
duplicatePods := make(map[podOwner]map[string][]*v1.Pod)
|
||||
ownerKeyOccurence := make(map[podOwner]int32)
|
||||
nodeCount := 0
|
||||
nodeMap := make(map[string]*v1.Node)
|
||||
|
||||
for _, node := range nodes {
|
||||
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
|
||||
pods, err := podutil.ListPodsOnANode(node.Name, r.handle.GetPodsAssignedToNodeFunc(), r.podFilter)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error listing evictable pods on node", "node", klog.KObj(node))
|
||||
continue
|
||||
}
|
||||
nodeMap[node.Name] = node
|
||||
nodeCount++
|
||||
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
|
||||
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
|
||||
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
|
||||
// This list of keys representing a single pod is then sorted alphabetically.
|
||||
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
|
||||
// - The 2 pods have the exact same ownerrefs
|
||||
// - The 2 pods have the exact same container images
|
||||
//
|
||||
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
|
||||
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
|
||||
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
|
||||
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
|
||||
// If not, then we add this pod's list to the list of lists for that key.
|
||||
duplicateKeysMap := map[string][][]string{}
|
||||
for _, pod := range pods {
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
|
||||
if len(ownerRefList) == 0 || hasExcludedOwnerRefKind(ownerRefList, r.args.ExcludeOwnerKinds) {
|
||||
continue
|
||||
}
|
||||
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
|
||||
imageList := []string{}
|
||||
for _, container := range pod.Spec.Containers {
|
||||
imageList = append(imageList, container.Image)
|
||||
}
|
||||
sort.Strings(imageList)
|
||||
imagesHash := strings.Join(imageList, "#")
|
||||
for _, ownerRef := range ownerRefList {
|
||||
ownerKey := podOwner{
|
||||
namespace: pod.ObjectMeta.Namespace,
|
||||
kind: ownerRef.Kind,
|
||||
name: ownerRef.Name,
|
||||
imagesHash: imagesHash,
|
||||
}
|
||||
ownerKeyOccurence[ownerKey] = ownerKeyOccurence[ownerKey] + 1
|
||||
for _, image := range imageList {
|
||||
// Namespace/Kind/Name should be unique for the cluster.
|
||||
// We also consider the image, as 2 pods could have the same owner but serve different purposes
|
||||
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
|
||||
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, image}, "/")
|
||||
podContainerKeys = append(podContainerKeys, s)
|
||||
}
|
||||
}
|
||||
sort.Strings(podContainerKeys)
|
||||
|
||||
// If there have been any other pods with the same first "key", look through all the lists to see if any match
|
||||
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
|
||||
matched := false
|
||||
for _, keys := range existing {
|
||||
if reflect.DeepEqual(keys, podContainerKeys) {
|
||||
matched = true
|
||||
klog.V(3).InfoS("Duplicate found", "pod", klog.KObj(pod))
|
||||
for _, ownerRef := range ownerRefList {
|
||||
ownerKey := podOwner{
|
||||
namespace: pod.ObjectMeta.Namespace,
|
||||
kind: ownerRef.Kind,
|
||||
name: ownerRef.Name,
|
||||
imagesHash: imagesHash,
|
||||
}
|
||||
if _, ok := duplicatePods[ownerKey]; !ok {
|
||||
duplicatePods[ownerKey] = make(map[string][]*v1.Pod)
|
||||
}
|
||||
duplicatePods[ownerKey][node.Name] = append(duplicatePods[ownerKey][node.Name], pod)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
// Found no matches, add this list of keys to the list of lists that have the same first key
|
||||
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
|
||||
}
|
||||
} else {
|
||||
// This is the first pod we've seen that has this first "key" entry
|
||||
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1. how many pods can be evicted to respect uniform placement of pods among viable nodes?
|
||||
for ownerKey, podNodes := range duplicatePods {
|
||||
|
||||
targetNodes := getTargetNodes(podNodes, nodes)
|
||||
|
||||
klog.V(2).InfoS("Adjusting feasible nodes", "owner", ownerKey, "from", nodeCount, "to", len(targetNodes))
|
||||
if len(targetNodes) < 2 {
|
||||
klog.V(1).InfoS("Less than two feasible nodes for duplicates to land, skipping eviction", "owner", ownerKey)
|
||||
continue
|
||||
}
|
||||
|
||||
upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes))))
|
||||
loop:
|
||||
for nodeName, pods := range podNodes {
|
||||
klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg)
|
||||
// list of duplicated pods does not contain the original referential pod
|
||||
if len(pods)+1 > upperAvg {
|
||||
// It's assumed all duplicated pods are in the same priority class
|
||||
// TODO(jchaloup): check if the pod has a different node to lend to
|
||||
for _, pod := range pods[upperAvg-1:] {
|
||||
r.handle.Evictor().Evict(ctx, pod, evictions.EvictOptions{})
|
||||
if r.handle.Evictor().NodeLimitExceeded(nodeMap[nodeName]) {
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getTargetNodes(podNodes map[string][]*v1.Pod, nodes []*v1.Node) []*v1.Node {
|
||||
// In order to reduce the number of pods processed, identify pods which have
|
||||
// equal (tolerations, nodeselectors, node affinity) terms and considered them
|
||||
// as identical. Identical pods wrt. (tolerations, nodeselectors, node affinity) terms
|
||||
// will produce the same result when checking if a pod is feasible for a node.
|
||||
// Thus, improving efficiency of processing pods marked for eviction.
|
||||
|
||||
// Collect all distinct pods which differ in at least taints, node affinity or node selector terms
|
||||
distinctPods := map[*v1.Pod]struct{}{}
|
||||
for _, pods := range podNodes {
|
||||
for _, pod := range pods {
|
||||
duplicated := false
|
||||
for dp := range distinctPods {
|
||||
if utils.TolerationsEqual(pod.Spec.Tolerations, dp.Spec.Tolerations) &&
|
||||
utils.NodeSelectorsEqual(getNodeAffinityNodeSelector(pod), getNodeAffinityNodeSelector(dp)) &&
|
||||
reflect.DeepEqual(pod.Spec.NodeSelector, dp.Spec.NodeSelector) {
|
||||
duplicated = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
if duplicated {
|
||||
continue
|
||||
}
|
||||
distinctPods[pod] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// For each distinct pod get a list of nodes where it can land
|
||||
targetNodesMap := map[string]*v1.Node{}
|
||||
for pod := range distinctPods {
|
||||
matchingNodes := map[string]*v1.Node{}
|
||||
for _, node := range nodes {
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
}) {
|
||||
continue
|
||||
}
|
||||
if match, err := utils.PodMatchNodeSelector(pod, node); err == nil && !match {
|
||||
continue
|
||||
}
|
||||
matchingNodes[node.Name] = node
|
||||
}
|
||||
if len(matchingNodes) > 1 {
|
||||
for nodeName := range matchingNodes {
|
||||
targetNodesMap[nodeName] = matchingNodes[nodeName]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
targetNodes := []*v1.Node{}
|
||||
for _, node := range targetNodesMap {
|
||||
targetNodes = append(targetNodes, node)
|
||||
}
|
||||
|
||||
return targetNodes
|
||||
}
|
||||
|
||||
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, excludeOwnerKinds []string) bool {
|
||||
if len(excludeOwnerKinds) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
exclude := sets.NewString(excludeOwnerKinds...)
|
||||
for _, owner := range ownerRefs {
|
||||
if exclude.Has(owner.Kind) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func getNodeAffinityNodeSelector(pod *v1.Pod) *v1.NodeSelector {
|
||||
if pod.Spec.Affinity == nil {
|
||||
return nil
|
||||
}
|
||||
if pod.Spec.Affinity.NodeAffinity == nil {
|
||||
return nil
|
||||
}
|
||||
return pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||
}
|
||||
Reference in New Issue
Block a user