mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
RemoveDuplicates: take node taints, node affinity and node selector into account when computing a number of feasible nodes for the average occurence of pods per node
Nodes with taints which are not tolerated by evicted pods will never run the pods. The same holds for node affinity and node selector. So increase the number of pods per feasible nodes to decrease the number of evicted pods.
This commit is contained in:
@@ -183,9 +183,17 @@ func RemoveDuplicatePods(
|
||||
}
|
||||
|
||||
// 1. how many pods can be evicted to respect uniform placement of pods among viable nodes?
|
||||
for ownerKey, nodes := range duplicatePods {
|
||||
upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(nodeCount)))
|
||||
for nodeName, pods := range nodes {
|
||||
for ownerKey, podNodes := range duplicatePods {
|
||||
targetNodes := getTargetNodes(podNodes, nodes)
|
||||
|
||||
klog.V(2).InfoS("Adjusting feasible nodes", "owner", ownerKey, "from", nodeCount, "to", len(targetNodes))
|
||||
if len(targetNodes) < 2 {
|
||||
klog.V(1).InfoS("Less than two feasible nodes for duplicates to land, skipping eviction", "owner", ownerKey)
|
||||
continue
|
||||
}
|
||||
|
||||
upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes))))
|
||||
for nodeName, pods := range podNodes {
|
||||
klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg)
|
||||
// list of duplicated pods does not contain the original referential pod
|
||||
if len(pods)+1 > upperAvg {
|
||||
@@ -202,6 +210,73 @@ func RemoveDuplicatePods(
|
||||
}
|
||||
}
|
||||
|
||||
func getNodeAffinityNodeSelector(pod *v1.Pod) *v1.NodeSelector {
|
||||
if pod.Spec.Affinity == nil {
|
||||
return nil
|
||||
}
|
||||
if pod.Spec.Affinity.NodeAffinity == nil {
|
||||
return nil
|
||||
}
|
||||
return pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||
}
|
||||
|
||||
func getTargetNodes(podNodes map[string][]*v1.Pod, nodes []*v1.Node) []*v1.Node {
|
||||
// In order to reduce the number of pods processed, identify pods which have
|
||||
// equal (tolerations, nodeselectors, node affinity) terms and considered them
|
||||
// as identical. Identical pods wrt. (tolerations, nodeselectors, node affinity) terms
|
||||
// will produce the same result when checking if a pod is feasible for a node.
|
||||
// Thus, improving efficiency of processing pods marked for eviction.
|
||||
|
||||
// Collect all distinct pods which differ in at least taints, node affinity or node selector terms
|
||||
distinctPods := map[*v1.Pod]struct{}{}
|
||||
for _, pods := range podNodes {
|
||||
for _, pod := range pods {
|
||||
duplicated := false
|
||||
for dp := range distinctPods {
|
||||
if utils.TolerationsEqual(pod.Spec.Tolerations, dp.Spec.Tolerations) &&
|
||||
utils.NodeSelectorsEqual(getNodeAffinityNodeSelector(pod), getNodeAffinityNodeSelector(dp)) &&
|
||||
reflect.DeepEqual(pod.Spec.NodeSelector, dp.Spec.NodeSelector) {
|
||||
duplicated = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
if duplicated {
|
||||
continue
|
||||
}
|
||||
distinctPods[pod] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// For each distinct pod get a list of nodes where it can land
|
||||
targetNodesMap := map[string]*v1.Node{}
|
||||
for pod := range distinctPods {
|
||||
matchingNodes := map[string]*v1.Node{}
|
||||
for _, node := range nodes {
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
}) {
|
||||
continue
|
||||
}
|
||||
if match, err := utils.PodMatchNodeSelector(pod, node); err == nil && !match {
|
||||
continue
|
||||
}
|
||||
matchingNodes[node.Name] = node
|
||||
}
|
||||
if len(matchingNodes) > 1 {
|
||||
for nodeName := range matchingNodes {
|
||||
targetNodesMap[nodeName] = matchingNodes[nodeName]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
targetNodes := []*v1.Node{}
|
||||
for _, node := range targetNodesMap {
|
||||
targetNodes = append(targetNodes, node)
|
||||
}
|
||||
|
||||
return targetNodes
|
||||
}
|
||||
|
||||
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool {
|
||||
if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil {
|
||||
return false
|
||||
|
||||
@@ -240,6 +240,117 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
setTolerationsK1 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "k1",
|
||||
Value: "v1",
|
||||
Operator: v1.TolerationOpEqual,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}
|
||||
setTolerationsK2 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "k2",
|
||||
Value: "v2",
|
||||
Operator: v1.TolerationOpEqual,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setMasterNoScheduleTaint := func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
Key: "node-role.kubernetes.io/master",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setMasterNoScheduleLabel := func(node *v1.Node) {
|
||||
if node.ObjectMeta.Labels == nil {
|
||||
node.ObjectMeta.Labels = map[string]string{}
|
||||
}
|
||||
node.ObjectMeta.Labels["node-role.kubernetes.io/master"] = ""
|
||||
}
|
||||
|
||||
setWorkerLabel := func(node *v1.Node) {
|
||||
if node.ObjectMeta.Labels == nil {
|
||||
node.ObjectMeta.Labels = map[string]string{}
|
||||
}
|
||||
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k1"
|
||||
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k2"
|
||||
}
|
||||
|
||||
setNotMasterNodeSelectorK1 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Affinity = &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "node-role.kubernetes.io/master",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
{
|
||||
Key: "k1",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setNotMasterNodeSelectorK2 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Affinity = &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "node-role.kubernetes.io/master",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
{
|
||||
Key: "k2",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setWorkerLabelSelectorK1 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
if pod.Spec.NodeSelector == nil {
|
||||
pod.Spec.NodeSelector = map[string]string{}
|
||||
}
|
||||
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k1"
|
||||
}
|
||||
|
||||
setWorkerLabelSelectorK2 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
if pod.Spec.NodeSelector == nil {
|
||||
pod.Spec.NodeSelector = map[string]string{}
|
||||
}
|
||||
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k2"
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
maxPodsToEvictPerNode int
|
||||
@@ -393,6 +504,106 @@ func TestRemoveDuplicatesUniformly(t *testing.T) {
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting taints",
|
||||
pods: []v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
*test.BuildTestPod("p1", 100, 0, "worker1", setTolerationsK1),
|
||||
*test.BuildTestPod("p2", 100, 0, "worker1", setTolerationsK2),
|
||||
*test.BuildTestPod("p3", 100, 0, "worker1", setTolerationsK1),
|
||||
*test.BuildTestPod("p4", 100, 0, "worker1", setTolerationsK2),
|
||||
*test.BuildTestPod("p5", 100, 0, "worker1", setTolerationsK1),
|
||||
*test.BuildTestPod("p6", 100, 0, "worker2", setTolerationsK2),
|
||||
*test.BuildTestPod("p7", 100, 0, "worker2", setTolerationsK1),
|
||||
*test.BuildTestPod("p8", 100, 0, "worker2", setTolerationsK2),
|
||||
*test.BuildTestPod("p9", 100, 0, "worker3", setTolerationsK1),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleTaint),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleTaint),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleTaint),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting RequiredDuringSchedulingIgnoredDuringExecution node affinity",
|
||||
pods: []v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
*test.BuildTestPod("p1", 100, 0, "worker1", setNotMasterNodeSelectorK1),
|
||||
*test.BuildTestPod("p2", 100, 0, "worker1", setNotMasterNodeSelectorK2),
|
||||
*test.BuildTestPod("p3", 100, 0, "worker1", setNotMasterNodeSelectorK1),
|
||||
*test.BuildTestPod("p4", 100, 0, "worker1", setNotMasterNodeSelectorK2),
|
||||
*test.BuildTestPod("p5", 100, 0, "worker1", setNotMasterNodeSelectorK1),
|
||||
*test.BuildTestPod("p6", 100, 0, "worker2", setNotMasterNodeSelectorK2),
|
||||
*test.BuildTestPod("p7", 100, 0, "worker2", setNotMasterNodeSelectorK1),
|
||||
*test.BuildTestPod("p8", 100, 0, "worker2", setNotMasterNodeSelectorK2),
|
||||
*test.BuildTestPod("p9", 100, 0, "worker3", setNotMasterNodeSelectorK1),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleLabel),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleLabel),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleLabel),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting node selector",
|
||||
pods: []v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
*test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, setWorkerLabel),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, setWorkerLabel),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, setWorkerLabel),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting node selector with zero target nodes",
|
||||
pods: []v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
*test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
|
||||
*test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
*test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
|
||||
},
|
||||
expectedEvictedPodCount: 0,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
|
||||
Reference in New Issue
Block a user