mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
Merge pull request #836 from a7i/balancedomains-belowavg
TopologySpreadConstraint: only evaluate nodes below ideal avg when balancing domains
This commit is contained in:
@@ -159,7 +159,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
|
||||
continue
|
||||
}
|
||||
|
||||
// 5. If the pod's node matches this constraint'selector topologyKey, create a topoPair and add the pod
|
||||
// 5. If the pod's node matches this constraint's topologyKey, create a topoPair and add the pod
|
||||
node, ok := nodeMap[namespacePods.Items[i].Spec.NodeName]
|
||||
if !ok {
|
||||
// If ok is false, node is nil in which case node.Labels will panic. In which case a pod is yet to be scheduled. So it's safe to just continue here.
|
||||
@@ -179,7 +179,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
|
||||
klog.V(2).InfoS("Skipping topology constraint because it is already balanced", "constraint", constraint)
|
||||
continue
|
||||
}
|
||||
balanceDomains(client, getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes)
|
||||
balanceDomains(getPodsAssignedToNode, podsForEviction, constraint, constraintTopologies, sumPods, evictorFilter.Filter, nodes)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,7 +244,6 @@ func topologyIsBalanced(topology map[topologyPair][]*v1.Pod, constraint v1.Topol
|
||||
// [5, 5, 5, 5, 5, 5]
|
||||
// (assuming even distribution by the scheduler of the evicted pods)
|
||||
func balanceDomains(
|
||||
client clientset.Interface,
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||
podsForEviction map[*v1.Pod]struct{},
|
||||
constraint v1.TopologySpreadConstraint,
|
||||
@@ -256,6 +255,8 @@ func balanceDomains(
|
||||
idealAvg := sumPods / float64(len(constraintTopologies))
|
||||
sortedDomains := sortDomains(constraintTopologies, isEvictable)
|
||||
|
||||
nodesBelowIdealAvg := filterNodesBelowIdealAvg(nodes, sortedDomains, constraint.TopologyKey, idealAvg)
|
||||
|
||||
// i is the index for belowOrEqualAvg
|
||||
// j is the index for aboveAvg
|
||||
i := 0
|
||||
@@ -306,7 +307,8 @@ func balanceDomains(
|
||||
// In other words, PTS can perform suboptimally if some of its chosen pods don't fit on other nodes.
|
||||
// This is because the chosen pods aren't sorted, but immovable pods still count as "evicted" toward the PTS algorithm.
|
||||
// So, a better selection heuristic could improve performance.
|
||||
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodes) {
|
||||
|
||||
if !node.PodFitsAnyOtherNode(getPodsAssignedToNode, aboveToEvict[k], nodesBelowIdealAvg) {
|
||||
klog.V(2).InfoS("ignoring pod for eviction as it does not fit on any other node", "pod", klog.KObj(aboveToEvict[k]))
|
||||
continue
|
||||
}
|
||||
@@ -318,6 +320,25 @@ func balanceDomains(
|
||||
}
|
||||
}
|
||||
|
||||
// filterNodesBelowIdealAvg will return nodes that have fewer pods matching topology domain than the idealAvg count.
|
||||
// the desired behavior is to not consider nodes in a given topology domain that are already packed.
|
||||
func filterNodesBelowIdealAvg(nodes []*v1.Node, sortedDomains []topology, topologyKey string, idealAvg float64) []*v1.Node {
|
||||
topologyNodesMap := make(map[string][]*v1.Node, len(sortedDomains))
|
||||
for _, node := range nodes {
|
||||
if topologyDomain, ok := node.Labels[topologyKey]; ok {
|
||||
topologyNodesMap[topologyDomain] = append(topologyNodesMap[topologyDomain], node)
|
||||
}
|
||||
}
|
||||
|
||||
var nodesBelowIdealAvg []*v1.Node
|
||||
for _, domain := range sortedDomains {
|
||||
if float64(len(domain.pods)) < idealAvg {
|
||||
nodesBelowIdealAvg = append(nodesBelowIdealAvg, topologyNodesMap[domain.pair.value]...)
|
||||
}
|
||||
}
|
||||
return nodesBelowIdealAvg
|
||||
}
|
||||
|
||||
// sortDomains sorts and splits the list of topology domains based on their size
|
||||
// it also sorts the list of pods within the domains based on their node affinity/selector and priority in the following order:
|
||||
// 1. non-evictable pods
|
||||
|
||||
@@ -6,10 +6,13 @@ import (
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policy "k8s.io/api/policy/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
@@ -22,6 +25,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
name string
|
||||
pods []*v1.Pod
|
||||
expectedEvictedCount uint
|
||||
expectedEvictedPods []string // if specified, will assert specific pods were evicted
|
||||
nodes []*v1.Node
|
||||
strategy api.DeschedulerStrategy
|
||||
namespaces []string
|
||||
@@ -642,6 +646,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 5,
|
||||
expectedEvictedPods: []string{"pod-5", "pod-6", "pod-7", "pod-8"},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
@@ -714,11 +719,12 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 1,
|
||||
expectedEvictedPods: []string{"pod-0"},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "2 domains, sizes [2,0], maxSkew=1, move 1 pods since pod does not tolerate the tainted node",
|
||||
name: "2 domains, sizes [2,0], maxSkew=1, move 0 pods since pod does not tolerate the tainted node",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, func(n *v1.Node) {
|
||||
@@ -817,6 +823,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 1,
|
||||
expectedEvictedPods: []string{"pod-0"},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
@@ -867,6 +874,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 1,
|
||||
expectedEvictedPods: []string{"pod-1"},
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{
|
||||
LabelSelector: getLabelSelector("foo", []string{"bar"}, metav1.LabelSelectorOpIn),
|
||||
@@ -894,6 +902,7 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 1,
|
||||
expectedEvictedPods: []string{"pod-1"},
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{
|
||||
LabelSelector: getLabelSelector("foo", []string{"baz"}, metav1.LabelSelectorOpNotIn),
|
||||
@@ -936,6 +945,234 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "3 domains, sizes [2,3,4], maxSkew=1, NodeFit is enabled, and not enough cpu on zoneA; nothing should be moved",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 250, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("n2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
|
||||
test.BuildTestNode("n3", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }),
|
||||
},
|
||||
pods: createTestPods([]testPodList{
|
||||
{
|
||||
count: 2,
|
||||
node: "n1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 3,
|
||||
node: "n2",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 4,
|
||||
node: "n3",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 0,
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{NodeFit: true},
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "3 domains, sizes [[1,0], [1,1], [2,1]], maxSkew=1, NodeFit is enabled, and not enough cpu on ZoneA; nothing should be moved",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("A1", 150, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
|
||||
test.BuildTestNode("C1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }),
|
||||
test.BuildTestNode("A2", 50, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
|
||||
test.BuildTestNode("C2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneC" }),
|
||||
},
|
||||
pods: createTestPods([]testPodList{
|
||||
{
|
||||
count: 1,
|
||||
node: "A1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
node: "B1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
node: "B2",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 2,
|
||||
node: "C1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
node: "C2",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 0,
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{NodeFit: true},
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
name: "2 domains, sizes [[1,4], [2,1]], maxSkew=1, NodeFit is enabled; should move 1",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("A1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("A2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneA" }),
|
||||
test.BuildTestNode("B1", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
|
||||
test.BuildTestNode("B2", 1000, 2000, 9, func(n *v1.Node) { n.Labels["zone"] = "zoneB" }),
|
||||
},
|
||||
pods: createTestPods([]testPodList{
|
||||
{
|
||||
count: 1,
|
||||
node: "A1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 4,
|
||||
node: "A2",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 2,
|
||||
node: "B1",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
node: "B2",
|
||||
labels: map[string]string{"foo": "bar"},
|
||||
constraints: getDefaultTopologyConstraints(1),
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 1,
|
||||
expectedEvictedPods: []string{"pod-4"},
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{NodeFit: true},
|
||||
},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
{
|
||||
// https://github.com/kubernetes-sigs/descheduler/issues/839
|
||||
name: "2 domains, sizes [3, 1], maxSkew=1, Tainted nodes; nothing should be moved",
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("controlplane1", 2000, 3000, 10, func(n *v1.Node) {
|
||||
n.Labels["zone"] = "zoneA"
|
||||
n.Labels["kubernetes.io/hostname"] = "cp-1"
|
||||
n.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
Key: "node-role.kubernetes.io/controlplane",
|
||||
Value: "true",
|
||||
},
|
||||
{
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
Key: "node-role.kubernetes.io/etcd",
|
||||
Value: "true",
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("infraservices1", 2000, 3000, 10, func(n *v1.Node) {
|
||||
n.Labels["zone"] = "zoneA"
|
||||
n.Labels["infra_service"] = "true"
|
||||
n.Labels["kubernetes.io/hostname"] = "infra-1"
|
||||
n.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
Key: "infra_service",
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("app1", 2000, 3000, 10, func(n *v1.Node) {
|
||||
n.Labels["zone"] = "zoneA"
|
||||
n.Labels["app_service"] = "true"
|
||||
n.Labels["kubernetes.io/hostname"] = "app-1"
|
||||
}),
|
||||
test.BuildTestNode("app2", 2000, 3000, 10, func(n *v1.Node) {
|
||||
n.Labels["zone"] = "zoneB"
|
||||
n.Labels["app_service"] = "true"
|
||||
n.Labels["kubernetes.io/hostname"] = "app-2"
|
||||
}),
|
||||
},
|
||||
pods: createTestPods([]testPodList{
|
||||
{
|
||||
count: 2,
|
||||
node: "app1",
|
||||
labels: map[string]string{"app.kubernetes.io/name": "service"},
|
||||
constraints: []v1.TopologySpreadConstraint{
|
||||
{
|
||||
MaxSkew: 1,
|
||||
TopologyKey: "kubernetes.io/hostname",
|
||||
WhenUnsatisfiable: v1.DoNotSchedule,
|
||||
LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "service"}},
|
||||
},
|
||||
},
|
||||
nodeAffinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "app_service",
|
||||
Operator: v1.NodeSelectorOpExists,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
count: 2,
|
||||
node: "app2",
|
||||
labels: map[string]string{"app.kubernetes.io/name": "service"},
|
||||
constraints: []v1.TopologySpreadConstraint{
|
||||
{
|
||||
MaxSkew: 1,
|
||||
TopologyKey: "kubernetes.io/hostname",
|
||||
WhenUnsatisfiable: v1.DoNotSchedule,
|
||||
LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "service"}},
|
||||
},
|
||||
},
|
||||
nodeAffinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "app_service",
|
||||
Operator: v1.NodeSelectorOpExists,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
expectedEvictedCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
namespaces: []string{"ns1"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@@ -964,6 +1201,20 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
var evictedPods []string
|
||||
fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() == "eviction" {
|
||||
createAct, matched := action.(core.CreateActionImpl)
|
||||
if !matched {
|
||||
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
|
||||
}
|
||||
if eviction, matched := createAct.Object.(*policy.Eviction); matched {
|
||||
evictedPods = append(evictedPods, eviction.GetName())
|
||||
}
|
||||
}
|
||||
return false, nil, nil // fallback to the default reactor
|
||||
})
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
@@ -994,6 +1245,18 @@ func TestTopologySpreadConstraint(t *testing.T) {
|
||||
if podsEvicted != tc.expectedEvictedCount {
|
||||
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", tc.name, tc.expectedEvictedCount, podsEvicted)
|
||||
}
|
||||
|
||||
if tc.expectedEvictedPods != nil {
|
||||
diff := sets.NewString(tc.expectedEvictedPods...).Difference(sets.NewString(evictedPods...))
|
||||
if diff.Len() > 0 {
|
||||
t.Errorf(
|
||||
"Expected pods %v to be evicted but %v were not evicted. Actual pods evicted: %v",
|
||||
tc.expectedEvictedPods,
|
||||
diff.List(),
|
||||
evictedPods,
|
||||
)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user