1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

TopologySpreadConstraint: only evaluate nodes below ideal avg when balancing domains

This commit is contained in:
Amir Alavi
2022-06-05 14:00:28 -04:00
parent b2418ef481
commit 469bde0a01
2 changed files with 146 additions and 4 deletions

View File

@@ -159,7 +159,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
continue
}
// 5. If the pod's node matches this constraint'selector topologyKey, create a topoPair and add the pod
// 5. If the pod's node matches this constraint's topologyKey, create a topoPair and add the pod
node, ok := nodeMap[namespacePods.Items[i].Spec.NodeName]
if !ok {
// If ok is false, node is nil in which case node.Labels will panic. In which case a pod is yet to be scheduled. So it's safe to just continue here.
@@ -179,7 +179,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
continue
}
balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes)
balanceDomains(getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes)
}
}
@@ -244,7 +244,6 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol
// [5, 5, 5, 5, 5, 5]
// (assuming even distribution by the scheduler of the evicted pods)
func balanceDomains(
client clientset.Interface,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
podsForEviction map[*v1.Pod]struct{},
constraint v1.TopologySpreadConstraint,
@@ -256,6 +255,8 @@ func balanceDomains(
idealAvg := sumPods / float64(len(constraintTopologies))
sortedDomains := sortDomains(constraintTopologies, isEvictable)
nodesBelowIdealAvg := filterNodesBelowIdealAvg(nodes, sortedDomains, constraint.TopologyKey, idealAvg)
// i is the index for belowOrEqualAvg
// j is the index for aboveAvg
i := 0
@@ -306,7 +307,8 @@ func balanceDomains(
// In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes.
// This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm.
// So, a better selection heuristic could improve performance.
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodesBelowIdealAvg) {
klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
continue
}
@@ -318,6 +320,25 @@ func balanceDomains(
}
}
// filterNodesBelowIdealAvg will return nodes that have fewer pods matching topology domain than the idealAvg count.
// the desired behavior is to not consider nodes in a given topology domain that are already packed.
func filterNodesBelowIdealAvg(nodes []*v1.Node, sortedDomains []topology, topologyKey string, idealAvg float64) []*v1.Node {
topologyNodesMap := make(map[string][]*v1.Node, len(sortedDomains))
for _, node := range nodes {
if topologyDomain, ok := node.Labels[topologyKey]; ok {
topologyNodesMap[topologyDomain] = append(topologyNodesMap[topologyDomain], node)
}
}
var nodesBelowIdealAvg []*v1.Node
for _, domain := range sortedDomains {
if float64(len(domain.pods)) < idealAvg {
nodesBelowIdealAvg = append(nodesBelowIdealAvg, topologyNodesMap[domain.pair.value]...)
}
}
return nodesBelowIdealAvg
}
// sortDomains sorts and splits the list of topology domains based on their size
// it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order:
// 1. non-evictable pods

View File

@@ -936,6 +936,127 @@ func TestTopologySpreadConstraint(t *testing.T) {
},
namespaces: []string{"ns1"},
},
{
name: "3 domains, sizes [2,3,4], maxSkew=1, NodeFit is enabled, and not enough cpu on zoneA; nothing should be moved",
nodes: []*v1.Node{
test.BuildTestNode("n1", 250, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("n2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
test.BuildTestNode("n3", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }),
},
pods: createTestPods([]testPodList{
{
count: 2,
node: "n1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 3,
node: "n2",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 4,
node: "n3",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
}),
expectedEvictedCount: 0,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{NodeFit: true},
},
namespaces: []string{"ns1"},
},
{
name: "3 domains, sizes [[1,0], [1,1], [2,1]], maxSkew=1, NodeFit is enabled, and not enough cpu on ZoneA; nothing should be moved",
nodes: []*v1.Node{
test.BuildTestNode("A1", 150, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
test.BuildTestNode("C1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }),
test.BuildTestNode("A2", 50, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
test.BuildTestNode("C2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }),
},
pods: createTestPods([]testPodList{
{
count: 1,
node: "A1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 1,
node: "B1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 1,
node: "B2",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 2,
node: "C1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 1,
node: "C2",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
}),
expectedEvictedCount: 0,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{NodeFit: true},
},
namespaces: []string{"ns1"},
},
{
name: "2 domains, sizes [[1,4], [2,1]], maxSkew=1, NodeFit is enabled; should move 1",
nodes: []*v1.Node{
test.BuildTestNode("A1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("A2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
},
pods: createTestPods([]testPodList{
{
count: 1,
node: "A1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 4,
node: "A2",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 2,
node: "B1",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
{
count: 1,
node: "B2",
labels: map[string]string{"foo": "bar"},
constraints: getDefaultTopologyConstraints(1),
},
}),
expectedEvictedCount: 1,
strategy: api.DeschedulerStrategy{
Params: &api.StrategyParameters{NodeFit: true},
},
namespaces: []string{"ns1"},
},
}
for _, tc := range testCases {