1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Check whether pod matches the inter-pod anti-affinity of another Pod in a given Node in NodeFit() (#1356)

* Check if Pod matches inter-pod anti-affinity of other pod on node as part of NodeFit()

* Add unit tests for checking inter-pod anti-affinity match in NodeFit()
* Export setPodAntiAffinity() helper func to test utils

* Add docs for inter-pod anti-affinity in README

* Refactor logic for inter-pod anti-affinity to use in multiple pkgs
* Move logic for finding match between pods with antiaffinity out of framework to reuse in other pkgs
* Move interpod antiaffinity funcs to pkg/utils/predicates.go

* Add unit tests for inter-pod anti-affinity check
* Test logic in GroupByNodeName
* Test NodeFit() case where pods matches inter-pod anti-affinity
* Test for inter-pod anti-affinity pods  match terms, have label selector

* NodeFit inter-pod anti-affinity check returns early if affinity spec not set
This commit is contained in:
Niki Manoledaki
2024-03-13 03:50:03 +01:00
committed by GitHub
parent dc2cf723bc
commit 749e81c51c
12 changed files with 328 additions and 132 deletions

View File

@@ -130,6 +130,13 @@ func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v
errors = append(errors, fmt.Errorf("node is not schedulable"))
}
// Check if pod matches inter-pod anti-affinity rule of pod on node
if match, err := podMatchesInterPodAntiAffinity(nodeIndexer, pod, node); err != nil {
errors = append(errors, err)
} else if match {
errors = append(errors, fmt.Errorf("pod matches inter-pod anti-affinity rule of other pod on node"))
}
return errors
}
@@ -323,3 +330,22 @@ func PodMatchNodeSelector(pod *v1.Pod, node *v1.Node) bool {
}
return matches
}
// podMatchesInterPodAntiAffinity checks if the pod matches the anti-affinity rule
// of another pod that is already on the given node.
// If a match is found, it returns true.
func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, error) {
if pod.Spec.Affinity == nil || pod.Spec.Affinity.PodAntiAffinity == nil {
return false, nil
}
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
if err != nil {
return false, fmt.Errorf("error listing all pods: %v", err)
}
podsInANamespace := podutil.GroupByNamespace(podsOnNode)
nodeMap := utils.CreateNodeMap([]*v1.Node{node})
return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil
}

View File

@@ -754,7 +754,11 @@ func TestPodFitsAnyOtherNode(t *testing.T) {
}
func TestNodeFit(t *testing.T) {
node := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil)
node := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"region": "main-region",
}
})
tests := []struct {
description string
pod *v1.Pod
@@ -781,6 +785,22 @@ func TestNodeFit(t *testing.T) {
},
err: errors.New("insufficient pods"),
},
{
description: "matches inter-pod anti-affinity rule of pod on node",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"),
},
err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"),
},
{
description: "pod fits on node",
pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}),
node: node,
podsOnNode: []*v1.Pod{},
err: nil,
},
}
for _, tc := range tests {
@@ -804,7 +824,8 @@ func TestNodeFit(t *testing.T) {
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
if errs := NodeFit(getPodsAssignedToNode, tc.pod, tc.node); (len(errs) == 0 && tc.err != nil) || errs[0].Error() != tc.err.Error() {
errs := NodeFit(getPodsAssignedToNode, tc.pod, tc.node)
if (len(errs) == 0 && tc.err != nil) || (len(errs) > 0 && errs[0].Error() != tc.err.Error()) {
t.Errorf("Test %#v failed, got %v, expect %v", tc.description, errs, tc.err)
}
})

View File

@@ -257,3 +257,12 @@ func SortPodsBasedOnAge(pods []*v1.Pod) {
return pods[i].CreationTimestamp.Before(&pods[j].CreationTimestamp)
})
}
func GroupByNodeName(pods []*v1.Pod) map[string][]*v1.Pod {
m := make(map[string][]*v1.Pod)
for i := 0; i < len(pods); i++ {
pod := pods[i]
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
}
return m
}

View File

@@ -176,3 +176,67 @@ func TestSortPodsBasedOnAge(t *testing.T) {
}
}
}
func TestGroupByNodeName(t *testing.T) {
tests := []struct {
name string
pods []*v1.Pod
expMap map[string][]*v1.Pod
}{
{
name: "list of pods is empty",
pods: []*v1.Pod{},
expMap: map[string][]*v1.Pod{},
},
{
name: "pods are on same node",
pods: []*v1.Pod{
{Spec: v1.PodSpec{
NodeName: "node1",
}},
{Spec: v1.PodSpec{
NodeName: "node1",
}},
},
expMap: map[string][]*v1.Pod{"node1": {
{Spec: v1.PodSpec{
NodeName: "node1",
}},
{Spec: v1.PodSpec{
NodeName: "node1",
}},
}},
},
{
name: "pods are on different nodes",
pods: []*v1.Pod{
{Spec: v1.PodSpec{
NodeName: "node1",
}},
{Spec: v1.PodSpec{
NodeName: "node2",
}},
},
expMap: map[string][]*v1.Pod{
"node1": {
{Spec: v1.PodSpec{
NodeName: "node1",
}},
},
"node2": {
{Spec: v1.PodSpec{
NodeName: "node2",
}},
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
resultMap := GroupByNodeName(test.pods)
if !reflect.DeepEqual(resultMap, test.expMap) {
t.Errorf("Expected %v node map, got %v", test.expMap, resultMap)
}
})
}
}

View File

@@ -28,7 +28,6 @@ import (
"sigs.k8s.io/descheduler/pkg/utils"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
)
@@ -86,8 +85,8 @@ func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context
}
podsInANamespace := podutil.GroupByNamespace(pods)
podsOnANode := groupByNodeName(pods)
nodeMap := createNodeMap(nodes)
podsOnANode := podutil.GroupByNodeName(pods)
nodeMap := utils.CreateNodeMap(nodes)
loop:
for _, node := range nodes {
@@ -97,15 +96,17 @@ loop:
podutil.SortPodsBasedOnPriorityLowToHigh(pods)
totalPods := len(pods)
for i := 0; i < totalPods; i++ {
if checkPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
// Since the current pod is evicted all other pods which have anti-affinity with this
// pod need not be evicted.
// Update allPods.
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
pods = append(pods[:i], pods[i+1:]...)
i--
totalPods--
if utils.CheckPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) {
if d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{StrategyName: PluginName}) {
// Since the current pod is evicted all other pods which have anti-affinity with this
// pod need not be evicted.
// Update allPods.
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
pods = append(pods[:i], pods[i+1:]...)
i--
totalPods--
}
}
}
if d.handle.Evictor().NodeLimitExceeded(node) {
@@ -130,87 +131,3 @@ func removePodFromNamespaceMap(podToRemove *v1.Pod, podMap map[string][]*v1.Pod)
}
return podMap
}
func groupByNodeName(pods []*v1.Pod) map[string][]*v1.Pod {
m := make(map[string][]*v1.Pod)
for i := 0; i < len(pods); i++ {
pod := pods[i]
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
}
return m
}
func createNodeMap(nodes []*v1.Node) map[string]*v1.Node {
m := make(map[string]*v1.Node, len(nodes))
for _, node := range nodes {
m[node.GetName()] = node
}
return m
}
// checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, existingPod := range pods[namespace] {
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
node, ok := nodeMap[pod.Spec.NodeName]
if !ok {
continue
}
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
klog.V(1).InfoS("Found Pods violating PodAntiAffinity", "pod to evicted", klog.KObj(pod))
return true
}
}
}
}
}
}
return false
}
func hasSameLabelValue(node1, node2 *v1.Node, key string) bool {
if node1.Name == node2.Name {
return true
}
node1Labels := node1.Labels
if node1Labels == nil {
return false
}
node2Labels := node2.Labels
if node2Labels == nil {
return false
}
value1, ok := node1Labels[key]
if !ok {
return false
}
value2, ok := node2Labels[key]
if !ok {
return false
}
return value1 == value2
}
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
}
return terms
}

View File

@@ -96,14 +96,14 @@ func TestPodAntiAffinity(t *testing.T) {
test.SetNormalOwnerRef(p11)
// set pod anti affinity
setPodAntiAffinity(p1, "foo", "bar")
setPodAntiAffinity(p3, "foo", "bar")
setPodAntiAffinity(p4, "foo", "bar")
setPodAntiAffinity(p5, "foo1", "bar1")
setPodAntiAffinity(p6, "foo1", "bar1")
setPodAntiAffinity(p7, "foo", "bar")
setPodAntiAffinity(p9, "foo", "bar")
setPodAntiAffinity(p10, "foo", "bar")
test.SetPodAntiAffinity(p1, "foo", "bar")
test.SetPodAntiAffinity(p3, "foo", "bar")
test.SetPodAntiAffinity(p4, "foo", "bar")
test.SetPodAntiAffinity(p5, "foo1", "bar1")
test.SetPodAntiAffinity(p6, "foo1", "bar1")
test.SetPodAntiAffinity(p7, "foo", "bar")
test.SetPodAntiAffinity(p9, "foo", "bar")
test.SetPodAntiAffinity(p10, "foo", "bar")
// set pod priority
test.SetPodPriority(p5, 100)
@@ -284,24 +284,3 @@ func TestPodAntiAffinity(t *testing.T) {
})
}
}
func setPodAntiAffinity(inputPod *v1.Pod, labelKey, labelValue string) {
inputPod.Spec.Affinity = &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: labelKey,
Operator: metav1.LabelSelectorOpIn,
Values: []string{labelValue},
},
},
},
TopologyKey: "region",
},
},
},
}
}

View File

@@ -22,6 +22,7 @@ import (
"sort"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2"
@@ -299,3 +300,84 @@ func GetNodeWeightGivenPodPreferredAffinity(pod *v1.Pod, node *v1.Node) (int32,
}
return sumWeights, nil
}
func CreateNodeMap(nodes []*v1.Node) map[string]*v1.Node {
m := make(map[string]*v1.Node, len(nodes))
for _, node := range nodes {
m[node.GetName()] = node
}
return m
}
// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := getNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, existingPod := range pods[namespace] {
if existingPod.Name != pod.Name && podMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
node, ok := nodeMap[pod.Spec.NodeName]
if !ok {
continue
}
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
klog.V(1).InfoS("Found Pods matching PodAntiAffinity", "pod with anti-affinity", klog.KObj(pod))
return true
}
}
}
}
}
}
return false
}
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
}
return terms
}
// hasSameLabelValue checks if the pods are in the same topology zone.
func hasSameLabelValue(node1, node2 *v1.Node, key string) bool {
if node1.Name == node2.Name {
return true
}
// no match if node has empty labels
node1Labels := node1.Labels
if node1Labels == nil {
return false
}
node2Labels := node2.Labels
if node2Labels == nil {
return false
}
// no match if node has no topology zone label with given key
value1, ok := node1Labels[key]
if !ok {
return false
}
value2, ok := node2Labels[key]
if !ok {
return false
}
return value1 == value2
}

View File

@@ -6,6 +6,7 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/descheduler/test"
)
func TestUniqueSortTolerations(t *testing.T) {
@@ -1041,3 +1042,71 @@ func TestPodNodeAffinityWeight(t *testing.T) {
})
}
}
func TestCheckPodsWithAntiAffinityExist(t *testing.T) {
tests := []struct {
name string
pod *v1.Pod
podsInNamespace map[string][]*v1.Pod
nodeMap map[string]*v1.Node
affinity *v1.PodAntiAffinity
expMatch bool
}{
{
name: "found pod matching pod anti-affinity",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node", nil), "foo", "bar"),
podsInNamespace: map[string][]*v1.Pod{
"default": {
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, "node", nil), "foo", "bar"),
},
},
nodeMap: map[string]*v1.Node{
"node": test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"region": "main-region",
}
}),
},
expMatch: true,
},
{
name: "no match with invalid label selector",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"wrong": "selector",
},
},
},
},
},
},
},
},
podsInNamespace: map[string][]*v1.Pod{},
nodeMap: map[string]*v1.Node{},
expMatch: false,
},
{
name: "no match if pod does not match terms namespace",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node", func(pod *v1.Pod) {
pod.Namespace = "other"
}), "foo", "bar"),
podsInNamespace: map[string][]*v1.Pod{},
nodeMap: map[string]*v1.Node{},
expMatch: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if match := CheckPodsWithAntiAffinityExist(test.pod, test.podsInNamespace, test.nodeMap); match != test.expMatch {
t.Errorf("exp %v got %v", test.expMatch, match)
}
})
}
}

View File

@@ -14,10 +14,10 @@ import (
const SystemCriticalPriority = 2 * int32(1000000000)
// GetNamespacesFromPodAffinityTerm returns a set of names
// getNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
@@ -27,9 +27,9 @@ func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffini
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// podMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
func podMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}