From 469bde0a01f58e4d5a3cfd636bdfb995baeb573a Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Sun, 5 Jun 2022 14:00:28 -0400 Subject: [PATCH 1/3] TopologySpreadConstraint: only evaluate nodes below ideal avg when balancing domains --- .../strategies/topologyspreadconstraint.go | 29 ++++- .../topologyspreadconstraint_test.go | 121 ++++++++++++++++++ 2 files changed, 146 insertions(+), 4 deletions(-) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 531bc4560..a5254dcdc 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -159,7 +159,7 @@ func RemovePodsViolatingTopologySpreadConstraint( continue } - // 5. If the pod's node matches this constraint'selector topologyKey, create a topoPair and add the pod + // 5. If the pod's node matches this constraint's topologyKey, create a topoPair and add the pod node, ok := nodeMap[namespacePods.Items[i].Spec.NodeName] if !ok { // If ok is false, node is nil in which case node.Labels will panic. In which case a pod is yet to be scheduled. So it's safe to just continue here. @@ -179,7 +179,7 @@ func RemovePodsViolatingTopologySpreadConstraint( klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint) continue } - balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes) + balanceDomains(getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes) } } @@ -244,7 +244,6 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol // [5, 5, 5, 5, 5, 5] // (assuming even distribution by the scheduler of the evicted pods) func balanceDomains( - client clientset.Interface, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, podsForEviction map[*v1.Pod]struct{}, constraint v1.TopologySpreadConstraint, @@ -256,6 +255,8 @@ func balanceDomains( idealAvg := sumPods / float64(len(constraintTopologies)) sortedDomains := sortDomains(constraintTopologies, isEvictable) + nodesBelowIdealAvg := filterNodesBelowIdealAvg(nodes, sortedDomains, constraint.TopologyKey, idealAvg) + // i is the index for belowOrEqualAvg // j is the index for aboveAvg i := 0 @@ -306,7 +307,8 @@ func balanceDomains( // In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes. // This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm. // So, a better selection heuristic could improve performance. - if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) { + + if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodesBelowIdealAvg) { klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k])) continue } @@ -318,6 +320,25 @@ func balanceDomains( } } +// filterNodesBelowIdealAvg will return nodes that have fewer pods matching topology domain than the idealAvg count. +// the desired behavior is to not consider nodes in a given topology domain that are already packed. +func filterNodesBelowIdealAvg(nodes []*v1.Node, sortedDomains []topology, topologyKey string, idealAvg float64) []*v1.Node { + topologyNodesMap := make(map[string][]*v1.Node, len(sortedDomains)) + for _, node := range nodes { + if topologyDomain, ok := node.Labels[topologyKey]; ok { + topologyNodesMap[topologyDomain] = append(topologyNodesMap[topologyDomain], node) + } + } + + var nodesBelowIdealAvg []*v1.Node + for _, domain := range sortedDomains { + if float64(len(domain.pods)) < idealAvg { + nodesBelowIdealAvg = append(nodesBelowIdealAvg, topologyNodesMap[domain.pair.value]...) + } + } + return nodesBelowIdealAvg +} + // sortDomains sorts and splits the list of topology domains based on their size // it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order: // 1. non-evictable pods diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index e27fb7645..82db1b916 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -936,6 +936,127 @@ func TestTopologySpreadConstraint(t *testing.T) { }, namespaces: []string{"ns1"}, }, + { + name: "3 domains, sizes [2,3,4], maxSkew=1, NodeFit is enabled, and not enough cpu on zoneA; nothing should be moved", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 250, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("n3", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }), + }, + pods: createTestPods([]testPodList{ + { + count: 2, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 3, + node: "n2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 4, + node: "n3", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{NodeFit: true}, + }, + namespaces: []string{"ns1"}, + }, + { + name: "3 domains, sizes [[1,0], [1,1], [2,1]], maxSkew=1, NodeFit is enabled, and not enough cpu on ZoneA; nothing should be moved", + nodes: []*v1.Node{ + test.BuildTestNode("A1", 150, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("C1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }), + test.BuildTestNode("A2", 50, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("C2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "A1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "B1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "B2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 2, + node: "C1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "C2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{NodeFit: true}, + }, + namespaces: []string{"ns1"}, + }, + { + name: "2 domains, sizes [[1,4], [2,1]], maxSkew=1, NodeFit is enabled; should move 1", + nodes: []*v1.Node{ + test.BuildTestNode("A1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("A2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "A1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 4, + node: "A2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 2, + node: "B1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "B2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + }), + expectedEvictedCount: 1, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{NodeFit: true}, + }, + namespaces: []string{"ns1"}, + }, } for _, tc := range testCases { From 7a5e67d4624ab0276854a60c5256b33b4f447ff9 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Mon, 20 Jun 2022 19:10:22 -0400 Subject: [PATCH 2/3] topologyspreadconstraint_test: ensure specific pods were evicted --- .../topologyspreadconstraint_test.go | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index 82db1b916..551eb0dc3 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -6,10 +6,13 @@ import ( "testing" v1 "k8s.io/api/core/v1" + policy "k8s.io/api/policy/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" @@ -22,6 +25,7 @@ func TestTopologySpreadConstraint(t *testing.T) { name string pods []*v1.Pod expectedEvictedCount uint + expectedEvictedPods []string // if specified, will assert specific pods were evicted nodes []*v1.Node strategy api.DeschedulerStrategy namespaces []string @@ -642,6 +646,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 5, + expectedEvictedPods: []string{"pod-5", "pod-6", "pod-7", "pod-8"}, strategy: api.DeschedulerStrategy{}, namespaces: []string{"ns1"}, }, @@ -714,11 +719,12 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, + expectedEvictedPods: []string{"pod-0"}, strategy: api.DeschedulerStrategy{}, namespaces: []string{"ns1"}, }, { - name: "2 domains, sizes [2,0], maxSkew=1, move 1 pods since pod does not tolerate the tainted node", + name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node", nodes: []*v1.Node{ test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) { @@ -817,6 +823,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, + expectedEvictedPods: []string{"pod-0"}, strategy: api.DeschedulerStrategy{}, namespaces: []string{"ns1"}, }, @@ -867,6 +874,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, + expectedEvictedPods: []string{"pod-1"}, strategy: api.DeschedulerStrategy{ Params: &api.StrategyParameters{ LabelSelector: getLabelSelector("foo", []string{"bar"}, metav1.LabelSelectorOpIn), @@ -894,6 +902,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, + expectedEvictedPods: []string{"pod-1"}, strategy: api.DeschedulerStrategy{ Params: &api.StrategyParameters{ LabelSelector: getLabelSelector("foo", []string{"baz"}, metav1.LabelSelectorOpNotIn), @@ -1052,6 +1061,7 @@ func TestTopologySpreadConstraint(t *testing.T) { }, }), expectedEvictedCount: 1, + expectedEvictedPods: []string{"pod-4"}, strategy: api.DeschedulerStrategy{ Params: &api.StrategyParameters{NodeFit: true}, }, @@ -1085,6 +1095,20 @@ func TestTopologySpreadConstraint(t *testing.T) { sharedInformerFactory.Start(ctx.Done()) sharedInformerFactory.WaitForCacheSync(ctx.Done()) + var evictedPods []string + fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + if action.GetSubresource() == "eviction" { + createAct, matched := action.(core.CreateActionImpl) + if !matched { + return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl") + } + if eviction, matched := createAct.Object.(*policy.Eviction); matched { + evictedPods = append(evictedPods, eviction.GetName()) + } + } + return false, nil, nil // fallback to the default reactor + }) + podEvictor := evictions.NewPodEvictor( fakeClient, "v1", @@ -1115,6 +1139,18 @@ func TestTopologySpreadConstraint(t *testing.T) { if podsEvicted != tc.expectedEvictedCount { t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.name, tc.expectedEvictedCount, podsEvicted) } + + if tc.expectedEvictedPods != nil { + diff := sets.NewString(tc.expectedEvictedPods...).Difference(sets.NewString(evictedPods...)) + if diff.Len() > 0 { + t.Errorf( + "Expected pods %v to be evicted but %v were not evicted. Actual pods evicted: %v", + tc.expectedEvictedPods, + diff.List(), + evictedPods, + ) + } + } }) } } From 934fffb669e9daba2eedd0650347d480a572386c Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Mon, 20 Jun 2022 20:53:30 -0400 Subject: [PATCH 3/3] RemovePodsViolatingTopologySpreadConstraint: test case to cover tainted nodes and eviction loop --- .../topologyspreadconstraint_test.go | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index 551eb0dc3..0852205b2 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -1067,6 +1067,112 @@ func TestTopologySpreadConstraint(t *testing.T) { }, namespaces: []string{"ns1"}, }, + { + // https://github.com/kubernetes-sigs/descheduler/issues/839 + name: "2 domains, sizes [3, 1], maxSkew=1, Tainted nodes; nothing should be moved", + nodes: []*v1.Node{ + test.BuildTestNode("controlplane1", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneA" + n.Labels["kubernetes.io/hostname"] = "cp-1" + n.Spec.Taints = []v1.Taint{ + { + Effect: v1.TaintEffectNoSchedule, + Key: "node-role.kubernetes.io/controlplane", + Value: "true", + }, + { + Effect: v1.TaintEffectNoSchedule, + Key: "node-role.kubernetes.io/etcd", + Value: "true", + }, + } + }), + test.BuildTestNode("infraservices1", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneA" + n.Labels["infra_service"] = "true" + n.Labels["kubernetes.io/hostname"] = "infra-1" + n.Spec.Taints = []v1.Taint{ + { + Effect: v1.TaintEffectNoSchedule, + Key: "infra_service", + }, + } + }), + test.BuildTestNode("app1", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneA" + n.Labels["app_service"] = "true" + n.Labels["kubernetes.io/hostname"] = "app-1" + }), + test.BuildTestNode("app2", 2000, 3000, 10, func(n *v1.Node) { + n.Labels["zone"] = "zoneB" + n.Labels["app_service"] = "true" + n.Labels["kubernetes.io/hostname"] = "app-2" + }), + }, + pods: createTestPods([]testPodList{ + { + count: 2, + node: "app1", + labels: map[string]string{"app.kubernetes.io/name": "service"}, + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "kubernetes.io/hostname", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "service"}}, + }, + }, + nodeAffinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "app_service", + Operator: v1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + }, + }, + }, + { + count: 2, + node: "app2", + labels: map[string]string{"app.kubernetes.io/name": "service"}, + constraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "kubernetes.io/hostname", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "service"}}, + }, + }, + nodeAffinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "app_service", + Operator: v1.NodeSelectorOpExists, + }, + }, + }, + }, + }, + }, + }, + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}}, + namespaces: []string{"ns1"}, + }, } for _, tc := range testCases {