mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 13:29:11 +01:00
pod anti-affinity check among nodes (#1033)
* pod anti-affinity check among nodes * avoid pod equality check with UID field also add node equality check with Name for short-cut * add test case for anti-affinity violation among different node * reduce ListPodsOnANode call * fix old code * apply gofumpt -w -extra move klog/v2 import entry to bottom according to master code
This commit is contained in:
@@ -78,24 +78,36 @@ func (d *RemovePodsViolatingInterPodAntiAffinity) Name() string {
|
||||
}
|
||||
|
||||
func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status {
|
||||
podsList, err := d.handle.ClientSet().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return &frameworktypes.Status{
|
||||
Err: fmt.Errorf("error listing all pods: %v", err),
|
||||
}
|
||||
}
|
||||
|
||||
podsInANamespace := groupByNamespace(podsList)
|
||||
|
||||
runningPodFilter := func(pod *v1.Pod) bool {
|
||||
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
|
||||
}
|
||||
podsOnANode := groupByNodeName(podsList, podutil.WrapFilterFuncs(runningPodFilter, d.podFilter))
|
||||
|
||||
nodeMap := createNodeMap(nodes)
|
||||
|
||||
loop:
|
||||
for _, node := range nodes {
|
||||
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
|
||||
pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter)
|
||||
if err != nil {
|
||||
return &frameworktypes.Status{
|
||||
Err: fmt.Errorf("error listing pods: %v", err),
|
||||
}
|
||||
}
|
||||
// sort the evictable Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
pods := podsOnANode[node.Name]
|
||||
// sort the evict-able Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
podutil.SortPodsBasedOnPriorityLowToHigh(pods)
|
||||
totalPods := len(pods)
|
||||
for i := 0; i < totalPods; i++ {
|
||||
if checkPodsWithAntiAffinityExist(pods[i], pods) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
|
||||
if checkPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) {
|
||||
if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{}) {
|
||||
// Since the current pod is evicted all other pods which have anti-affinity with this
|
||||
// pod need not be evicted.
|
||||
// Update pods.
|
||||
// Update allPods.
|
||||
podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace)
|
||||
pods = append(pods[:i], pods[i+1:]...)
|
||||
i--
|
||||
totalPods--
|
||||
@@ -109,8 +121,54 @@ loop:
|
||||
return nil
|
||||
}
|
||||
|
||||
func removePodFromNamespaceMap(podToRemove *v1.Pod, podMap map[string][]*v1.Pod) map[string][]*v1.Pod {
|
||||
podList, ok := podMap[podToRemove.Namespace]
|
||||
if !ok {
|
||||
return podMap
|
||||
}
|
||||
for i := 0; i < len(podList); i++ {
|
||||
podToCheck := podList[i]
|
||||
if podToRemove.Name == podToCheck.Name {
|
||||
podMap[podToRemove.Namespace] = append(podList[:i], podList[i+1:]...)
|
||||
return podMap
|
||||
}
|
||||
}
|
||||
return podMap
|
||||
}
|
||||
|
||||
func groupByNamespace(pods *v1.PodList) map[string][]*v1.Pod {
|
||||
m := make(map[string][]*v1.Pod)
|
||||
for i := 0; i < len(pods.Items); i++ {
|
||||
pod := &(pods.Items[i])
|
||||
m[pod.Namespace] = append(m[pod.Namespace], pod)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func groupByNodeName(pods *v1.PodList, filter podutil.FilterFunc) map[string][]*v1.Pod {
|
||||
m := make(map[string][]*v1.Pod)
|
||||
if filter == nil {
|
||||
filter = func(p *v1.Pod) bool { return true }
|
||||
}
|
||||
for i := 0; i < len(pods.Items); i++ {
|
||||
pod := &(pods.Items[i])
|
||||
if filter(pod) {
|
||||
m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod)
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func createNodeMap(nodes []*v1.Node) map[string]*v1.Node {
|
||||
m := make(map[string]*v1.Node, len(nodes))
|
||||
for _, node := range nodes {
|
||||
m[node.GetName()] = node
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
|
||||
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
|
||||
func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
|
||||
affinity := pod.Spec.Affinity
|
||||
if affinity != nil && affinity.PodAntiAffinity != nil {
|
||||
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
|
||||
@@ -120,9 +178,22 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
|
||||
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
|
||||
return false
|
||||
}
|
||||
for _, existingPod := range pods {
|
||||
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
|
||||
return true
|
||||
for namespace := range namespaces {
|
||||
for _, existingPod := range pods[namespace] {
|
||||
if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
|
||||
node, ok := nodeMap[pod.Spec.NodeName]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
|
||||
klog.V(1).InfoS("Found Pods violating PodAntiAffinity", "pod to evicted", klog.KObj(pod))
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -130,6 +201,29 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func hasSameLabelValue(node1, node2 *v1.Node, key string) bool {
|
||||
if node1.Name == node2.Name {
|
||||
return true
|
||||
}
|
||||
node1Labels := node1.Labels
|
||||
if node1Labels == nil {
|
||||
return false
|
||||
}
|
||||
node2Labels := node2.Labels
|
||||
if node2Labels == nil {
|
||||
return false
|
||||
}
|
||||
value1, ok := node1Labels[key]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
value2, ok := node2Labels[key]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return value1 == value2
|
||||
}
|
||||
|
||||
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
|
||||
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
|
||||
if podAntiAffinity != nil {
|
||||
|
||||
@@ -38,19 +38,27 @@ import (
|
||||
)
|
||||
|
||||
func TestPodAntiAffinity(t *testing.T) {
|
||||
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
"region": "main-region",
|
||||
}
|
||||
})
|
||||
node2 := test.BuildTestNode("n2", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
"datacenter": "east",
|
||||
}
|
||||
})
|
||||
|
||||
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.Spec = v1.NodeSpec{
|
||||
Unschedulable: true,
|
||||
}
|
||||
})
|
||||
node4 := test.BuildTestNode("n4", 2, 2, 1, nil)
|
||||
node5 := test.BuildTestNode("n5", 200, 3000, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
"region": "main-region",
|
||||
}
|
||||
})
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
||||
@@ -62,6 +70,7 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
|
||||
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
|
||||
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
|
||||
p11 := test.BuildTestPod("p11", 100, 0, node5.Name, nil)
|
||||
p9.DeletionTimestamp = &metav1.Time{}
|
||||
p10.DeletionTimestamp = &metav1.Time{}
|
||||
|
||||
@@ -73,6 +82,7 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
p5.Labels = map[string]string{"foo": "bar"}
|
||||
p6.Labels = map[string]string{"foo": "bar"}
|
||||
p7.Labels = map[string]string{"foo1": "bar1"}
|
||||
p11.Labels = map[string]string{"foo": "bar"}
|
||||
nonEvictablePod.Labels = map[string]string{"foo": "bar"}
|
||||
test.SetNormalOwnerRef(p1)
|
||||
test.SetNormalOwnerRef(p2)
|
||||
@@ -83,6 +93,7 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
test.SetNormalOwnerRef(p7)
|
||||
test.SetNormalOwnerRef(p9)
|
||||
test.SetNormalOwnerRef(p10)
|
||||
test.SetNormalOwnerRef(p11)
|
||||
|
||||
// set pod anti affinity
|
||||
setPodAntiAffinity(p1, "foo", "bar")
|
||||
@@ -186,6 +197,13 @@ func TestPodAntiAffinity(t *testing.T) {
|
||||
expectedEvictedPodCount: 0,
|
||||
nodeFit: true,
|
||||
},
|
||||
{
|
||||
description: "Evict pod violating anti-affinity among different node (all pods have anti-affinity)",
|
||||
pods: []*v1.Pod{p1, p11},
|
||||
nodes: []*v1.Node{node1, node5},
|
||||
expectedEvictedPodCount: 1,
|
||||
nodeFit: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
Reference in New Issue
Block a user