From c80556fc91e46c3250911e6be02d234562f6493c Mon Sep 17 00:00:00 2001 From: Hao Fan Date: Sun, 2 Jun 2024 18:58:57 +0800 Subject: [PATCH] fix the issue that the pod anti-filtering rules are not taking effect --- pkg/descheduler/node/node.go | 27 ++++++++- pkg/descheduler/node/node_test.go | 38 +++++++++++-- pkg/utils/predicates.go | 95 +++++++++++++++++++++---------- pkg/utils/priority.go | 29 ---------- 4 files changed, 123 insertions(+), 66 deletions(-) diff --git a/pkg/descheduler/node/node.go b/pkg/descheduler/node/node.go index eed060463..d28328065 100644 --- a/pkg/descheduler/node/node.go +++ b/pkg/descheduler/node/node.go @@ -343,9 +343,30 @@ func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFun if err != nil { return false, fmt.Errorf("error listing all pods: %v", err) } + assignedPodsInNamespace := podutil.GroupByNamespace(podsOnNode) - podsInANamespace := podutil.GroupByNamespace(podsOnNode) - nodeMap := utils.CreateNodeMap([]*v1.Node{node}) + for _, term := range utils.GetPodAntiAffinityTerms(pod.Spec.Affinity.PodAntiAffinity) { + namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term) + selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) + if err != nil { + klog.ErrorS(err, "Unable to convert LabelSelector into Selector") + return false, err + } - return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil + for namespace := range namespaces { + for _, assignedPod := range assignedPodsInNamespace[namespace] { + if assignedPod.Name == pod.Name || !utils.PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) { + klog.V(4).InfoS("Pod doesn't match inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod)) + continue + } + + if _, ok := node.Labels[term.TopologyKey]; ok { + klog.V(1).InfoS("Pod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod)) + return true, nil + } + } + } + } + + return false, nil } diff --git a/pkg/descheduler/node/node_test.go b/pkg/descheduler/node/node_test.go index dafc0e920..383e18bc5 100644 --- a/pkg/descheduler/node/node_test.go +++ b/pkg/descheduler/node/node_test.go @@ -759,6 +759,9 @@ func TestNodeFit(t *testing.T) { "region": "main-region", } }) + + nodeNolabel := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil) + tests := []struct { description string pod *v1.Pod @@ -767,7 +770,7 @@ func TestNodeFit(t *testing.T) { err error }{ { - description: "insufficient cpu", + description: "Insufficient cpu", pod: test.BuildTestPod("p1", 10000, 2*1000*1000*1000, "", nil), node: node, podsOnNode: []*v1.Pod{ @@ -776,7 +779,7 @@ func TestNodeFit(t *testing.T) { err: errors.New("insufficient cpu"), }, { - description: "insufficient pod num", + description: "Insufficient pod num", pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, "", nil), node: node, podsOnNode: []*v1.Pod{ @@ -786,7 +789,7 @@ func TestNodeFit(t *testing.T) { err: errors.New("insufficient pods"), }, { - description: "matches inter-pod anti-affinity rule of pod on node", + description: "Pod matches inter-pod anti-affinity rule of other pod on node", pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"), node: node, podsOnNode: []*v1.Pod{ @@ -795,11 +798,36 @@ func TestNodeFit(t *testing.T) { err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"), }, { - description: "pod fits on node", + description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because pod and other pod is not same namespace", + pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"), + node: node, + podsOnNode: []*v1.Pod{ + test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, func(pod *v1.Pod) { + pod.Namespace = "test" + }), "foo", "bar"), + }, + }, + { + description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because other pod not match labels of pod", + pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"), + node: node, + podsOnNode: []*v1.Pod{ + test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo1", "bar1"), + }, + }, + { + description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because node have no topologyKey", + pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node1", nil), "foo", "bar"), + node: nodeNolabel, + podsOnNode: []*v1.Pod{ + test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"), + }, + }, + { + description: "Pod fits on node", pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}), node: node, podsOnNode: []*v1.Pod{}, - err: nil, }, } diff --git a/pkg/utils/predicates.go b/pkg/utils/predicates.go index 48f25a137..7153f1792 100644 --- a/pkg/utils/predicates.go +++ b/pkg/utils/predicates.go @@ -24,10 +24,37 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/component-helpers/scheduling/corev1" "k8s.io/klog/v2" ) +// GetNamespacesFromPodAffinityTerm returns a set of names +// according to the namespaces indicated in podAffinityTerm. +// If namespaces is empty it considers the given pod's namespace. +func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] { + names := sets.New[string]() + if len(podAffinityTerm.Namespaces) == 0 { + names.Insert(pod.Namespace) + } else { + names.Insert(podAffinityTerm.Namespaces...) + } + return names +} + +// PodMatchesTermsNamespaceAndSelector returns true if the given +// matches the namespace and selector defined by `s . +func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool { + if !namespaces.Has(pod.Namespace) { + return false + } + + if !selector.Matches(labels.Set(pod.Labels)) { + return false + } + return true +} + // The following code has been copied from predicates package to avoid the // huge vendoring issues, mostly copied from // k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/ @@ -309,42 +336,52 @@ func CreateNodeMap(nodes []*v1.Node) map[string]*v1.Node { return m } -// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate. -func CheckPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool { - affinity := pod.Spec.Affinity - if affinity != nil && affinity.PodAntiAffinity != nil { - for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { - namespaces := getNamespacesFromPodAffinityTerm(pod, &term) - selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) - if err != nil { - klog.ErrorS(err, "Unable to convert LabelSelector into Selector") - return false - } - for namespace := range namespaces { - for _, existingPod := range pods[namespace] { - if existingPod.Name != pod.Name && podMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) { - node, ok := nodeMap[pod.Spec.NodeName] - if !ok { - continue - } - nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName] - if !ok { - continue - } - if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) { - klog.V(1).InfoS("Found Pods matching PodAntiAffinity", "pod with anti-affinity", klog.KObj(pod)) - return true - } - } +// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current candidate pod cannot tolerate. +func CheckPodsWithAntiAffinityExist(candidatePod *v1.Pod, assignedPods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool { + nodeHavingCandidatePod, ok := nodeMap[candidatePod.Spec.NodeName] + if !ok { + klog.Warningf("CandidatePod %s does not exist in nodeMap", klog.KObj(candidatePod)) + return false + } + + affinity := candidatePod.Spec.Affinity + if affinity == nil || affinity.PodAntiAffinity == nil { + return false + } + + for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) { + namespaces := GetNamespacesFromPodAffinityTerm(candidatePod, &term) + selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) + if err != nil { + klog.ErrorS(err, "Unable to convert LabelSelector into Selector") + return false + } + + for namespace := range namespaces { + for _, assignedPod := range assignedPods[namespace] { + if assignedPod.Name == candidatePod.Name || !PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) { + klog.V(4).InfoS("CandidatePod doesn't matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod)) + continue + } + + nodeHavingAssignedPod, ok := nodeMap[assignedPod.Spec.NodeName] + if !ok { + continue + } + + if hasSameLabelValue(nodeHavingCandidatePod, nodeHavingAssignedPod, term.TopologyKey) { + klog.V(1).InfoS("CandidatePod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod)) + return true } } } } + return false } -// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod. -func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) { +// GetPodAntiAffinityTerms gets the antiaffinity terms for the given pod. +func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) { if podAntiAffinity != nil { if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 { terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution diff --git a/pkg/utils/priority.go b/pkg/utils/priority.go index 2ba401a4e..9a389dbe9 100644 --- a/pkg/utils/priority.go +++ b/pkg/utils/priority.go @@ -4,42 +4,13 @@ import ( "context" "fmt" - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" "sigs.k8s.io/descheduler/pkg/api" ) const SystemCriticalPriority = 2 * int32(1000000000) -// getNamespacesFromPodAffinityTerm returns a set of names -// according to the namespaces indicated in podAffinityTerm. -// If namespaces is empty it considers the given pod's namespace. -func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] { - names := sets.New[string]() - if len(podAffinityTerm.Namespaces) == 0 { - names.Insert(pod.Namespace) - } else { - names.Insert(podAffinityTerm.Namespaces...) - } - return names -} - -// podMatchesTermsNamespaceAndSelector returns true if the given -// matches the namespace and selector defined by `s . -func podMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool { - if !namespaces.Has(pod.Namespace) { - return false - } - - if !selector.Matches(labels.Set(pod.Labels)) { - return false - } - return true -} - // GetPriorityFromPriorityClass gets priority from the given priority class. // If no priority class is provided, it will return SystemCriticalPriority by default. func GetPriorityFromPriorityClass(ctx context.Context, client clientset.Interface, name string) (int32, error) {