From 469bde0a01f58e4d5a3cfd636bdfb995baeb573a Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Sun, 5 Jun 2022 14:00:28 -0400 Subject: [PATCH] TopologySpreadConstraint: only evaluate nodes below ideal avg when balancing domains --- .../strategies/topologyspreadconstraint.go | 29 ++++- .../topologyspreadconstraint_test.go | 121 ++++++++++++++++++ 2 files changed, 146 insertions(+), 4 deletions(-) diff --git a/pkg/descheduler/strategies/topologyspreadconstraint.go b/pkg/descheduler/strategies/topologyspreadconstraint.go index 531bc4560..a5254dcdc 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint.go @@ -159,7 +159,7 @@ func RemovePodsViolatingTopologySpreadConstraint( continue } - // 5. If the pod's node matches this constraint'selector topologyKey, create a topoPair and add the pod + // 5. If the pod's node matches this constraint's topologyKey, create a topoPair and add the pod node, ok := nodeMap[namespacePods.Items[i].Spec.NodeName] if !ok { // If ok is false, node is nil in which case node.Labels will panic. In which case a pod is yet to be scheduled. So it's safe to just continue here. @@ -179,7 +179,7 @@ func RemovePodsViolatingTopologySpreadConstraint( klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint) continue } - balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes) + balanceDomains(getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes) } } @@ -244,7 +244,6 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol // [5, 5, 5, 5, 5, 5] // (assuming even distribution by the scheduler of the evicted pods) func balanceDomains( - client clientset.Interface, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, podsForEviction map[*v1.Pod]struct{}, constraint v1.TopologySpreadConstraint, @@ -256,6 +255,8 @@ func balanceDomains( idealAvg := sumPods / float64(len(constraintTopologies)) sortedDomains := sortDomains(constraintTopologies, isEvictable) + nodesBelowIdealAvg := filterNodesBelowIdealAvg(nodes, sortedDomains, constraint.TopologyKey, idealAvg) + // i is the index for belowOrEqualAvg // j is the index for aboveAvg i := 0 @@ -306,7 +307,8 @@ func balanceDomains( // In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes. // This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm. // So, a better selection heuristic could improve performance. - if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) { + + if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodesBelowIdealAvg) { klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k])) continue } @@ -318,6 +320,25 @@ func balanceDomains( } } +// filterNodesBelowIdealAvg will return nodes that have fewer pods matching topology domain than the idealAvg count. +// the desired behavior is to not consider nodes in a given topology domain that are already packed. +func filterNodesBelowIdealAvg(nodes []*v1.Node, sortedDomains []topology, topologyKey string, idealAvg float64) []*v1.Node { + topologyNodesMap := make(map[string][]*v1.Node, len(sortedDomains)) + for _, node := range nodes { + if topologyDomain, ok := node.Labels[topologyKey]; ok { + topologyNodesMap[topologyDomain] = append(topologyNodesMap[topologyDomain], node) + } + } + + var nodesBelowIdealAvg []*v1.Node + for _, domain := range sortedDomains { + if float64(len(domain.pods)) < idealAvg { + nodesBelowIdealAvg = append(nodesBelowIdealAvg, topologyNodesMap[domain.pair.value]...) + } + } + return nodesBelowIdealAvg +} + // sortDomains sorts and splits the list of topology domains based on their size // it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order: // 1. non-evictable pods diff --git a/pkg/descheduler/strategies/topologyspreadconstraint_test.go b/pkg/descheduler/strategies/topologyspreadconstraint_test.go index e27fb7645..82db1b916 100644 --- a/pkg/descheduler/strategies/topologyspreadconstraint_test.go +++ b/pkg/descheduler/strategies/topologyspreadconstraint_test.go @@ -936,6 +936,127 @@ func TestTopologySpreadConstraint(t *testing.T) { }, namespaces: []string{"ns1"}, }, + { + name: "3 domains, sizes [2,3,4], maxSkew=1, NodeFit is enabled, and not enough cpu on zoneA; nothing should be moved", + nodes: []*v1.Node{ + test.BuildTestNode("n1", 250, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("n2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("n3", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }), + }, + pods: createTestPods([]testPodList{ + { + count: 2, + node: "n1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 3, + node: "n2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 4, + node: "n3", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{NodeFit: true}, + }, + namespaces: []string{"ns1"}, + }, + { + name: "3 domains, sizes [[1,0], [1,1], [2,1]], maxSkew=1, NodeFit is enabled, and not enough cpu on ZoneA; nothing should be moved", + nodes: []*v1.Node{ + test.BuildTestNode("A1", 150, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("C1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }), + test.BuildTestNode("A2", 50, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("C2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "A1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "B1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "B2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 2, + node: "C1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "C2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + }), + expectedEvictedCount: 0, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{NodeFit: true}, + }, + namespaces: []string{"ns1"}, + }, + { + name: "2 domains, sizes [[1,4], [2,1]], maxSkew=1, NodeFit is enabled; should move 1", + nodes: []*v1.Node{ + test.BuildTestNode("A1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("A2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }), + test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }), + }, + pods: createTestPods([]testPodList{ + { + count: 1, + node: "A1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 4, + node: "A2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 2, + node: "B1", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + { + count: 1, + node: "B2", + labels: map[string]string{"foo": "bar"}, + constraints: getDefaultTopologyConstraints(1), + }, + }), + expectedEvictedCount: 1, + strategy: api.DeschedulerStrategy{ + Params: &api.StrategyParameters{NodeFit: true}, + }, + namespaces: []string{"ns1"}, + }, } for _, tc := range testCases {