From 84d648ff604a1b0d6b210cbf6c7dddba0ed3b639 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Thu, 8 Jul 2021 23:36:02 -0400 Subject: [PATCH 1/2] Add e2e tests for TopologySpreadConstraint --- hack/kind_config.yaml | 12 ++ test/e2e/e2e_test.go | 27 +-- test/e2e/e2e_topologyspreadconstraint_test.go | 170 ++++++++++++++++++ 3 files changed, 198 insertions(+), 11 deletions(-) create mode 100644 test/e2e/e2e_topologyspreadconstraint_test.go diff --git a/hack/kind_config.yaml b/hack/kind_config.yaml index 752e993cd..c4057022b 100644 --- a/hack/kind_config.yaml +++ b/hack/kind_config.yaml @@ -3,4 +3,16 @@ apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane - role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "topology.kubernetes.io/zone=local-a" - role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "topology.kubernetes.io/zone=local-b" diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index db599dad3..9a3008f2a 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -62,11 +62,11 @@ func MakePodSpec(priorityClassName string, gracePeriod *int64) v1.PodSpec { Resources: v1.ResourceRequirements{ Limits: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("100m"), - v1.ResourceMemory: resource.MustParse("1000Mi"), + v1.ResourceMemory: resource.MustParse("200Mi"), }, Requests: v1.ResourceList{ v1.ResourceCPU: resource.MustParse("100m"), - v1.ResourceMemory: resource.MustParse("800Mi"), + v1.ResourceMemory: resource.MustParse("100Mi"), }, }, }}, @@ -209,15 +209,7 @@ func TestLowNodeUtilization(t *testing.T) { t.Errorf("Error listing node with %v", err) } - var nodes []*v1.Node - var workerNodes []*v1.Node - for i := range nodeList.Items { - node := nodeList.Items[i] - nodes = append(nodes, &node) - if _, exists := node.Labels["node-role.kubernetes.io/master"]; !exists { - workerNodes = append(workerNodes, &node) - } - } + nodes, workerNodes := splitNodesAndWorkerNodes(nodeList.Items) t.Log("Creating testing namespace") testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} @@ -1288,3 +1280,16 @@ func waitForPodRunning(ctx context.Context, t *testing.T, clientSet clientset.In t.Fatalf("Error waiting for pod running: %v", err) } } + +func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) { + var allNodes []*v1.Node + var workerNodes []*v1.Node + for i := range nodes { + node := nodes[i] + allNodes = append(allNodes, &node) + if _, exists := node.Labels["node-role.kubernetes.io/master"]; !exists { + workerNodes = append(workerNodes, &node) + } + } + return allNodes, workerNodes +} diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go new file mode 100644 index 000000000..ad814e027 --- /dev/null +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -0,0 +1,170 @@ +package e2e + +import ( + "context" + "fmt" + "math" + "strings" + "testing" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + deschedulerapi "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/evictions" + eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" + "sigs.k8s.io/descheduler/pkg/descheduler/strategies" +) + +const zoneTopologyKey string = "topology.kubernetes.io/zone" + +func TestTopologySpreadConstraint(t *testing.T) { + ctx := context.Background() + clientSet, _, stopCh := initializeClient(t) + defer close(stopCh) + nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + t.Errorf("Error listing node with %v", err) + } + nodes, workerNodes := splitNodesAndWorkerNodes(nodeList.Items) + t.Log("Creating testing namespace") + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + + testCases := map[string]struct { + replicaCount int + maxSkew int + labelKey string + labelValue string + constraint v1.UnsatisfiableConstraintAction + }{ + "test-rc-topology-spread-hard-constraint": { + replicaCount: 4, + maxSkew: 1, + labelKey: "test", + labelValue: "topology-spread-hard-constraint", + constraint: v1.DoNotSchedule, + }, + "test-rc-topology-spread-soft-constraint": { + replicaCount: 4, + maxSkew: 1, + labelKey: "test", + labelValue: "topology-spread-soft-constraint", + constraint: v1.ScheduleAnyway, + }, + } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + t.Logf("Creating RC %s with %d replicas", name, tc.replicaCount) + rc := RcByNameContainer(name, testNamespace.Name, int32(tc.replicaCount), map[string]string{tc.labelKey: tc.labelValue}, nil, "") + rc.Spec.Template.Spec.TopologySpreadConstraints = makeTopologySpreadConstraints(tc.maxSkew, tc.labelKey, tc.labelValue, tc.constraint) + if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating RC %s %v", name, err) + } + defer deleteRC(ctx, t, clientSet, rc) + waitForRCPodsRunning(ctx, t, clientSet, rc) + + // Create a "Violator" RC that has the same label and is forced to be on the same node using a nodeSelector + violatorRcName := name + "-violator" + violatorCount := tc.maxSkew + 1 + violatorRc := RcByNameContainer(violatorRcName, testNamespace.Name, int32(violatorCount), map[string]string{tc.labelKey: tc.labelValue}, nil, "") + violatorRc.Spec.Template.Spec.NodeSelector = map[string]string{zoneTopologyKey: workerNodes[0].Labels[zoneTopologyKey]} + rc.Spec.Template.Spec.TopologySpreadConstraints = makeTopologySpreadConstraints(tc.maxSkew, tc.labelKey, tc.labelValue, tc.constraint) + if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, violatorRc, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating RC %s: %v", violatorRcName, err) + } + defer deleteRC(ctx, t, clientSet, violatorRc) + waitForRCPodsRunning(ctx, t, clientSet, violatorRc) + + // Run TopologySpreadConstraint strategy + evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) + if err != nil || len(evictionPolicyGroupVersion) == 0 { + t.Fatalf("Error creating eviction policy group for %s: %v", name, err) + } + podEvictor := evictions.NewPodEvictor( + clientSet, + evictionPolicyGroupVersion, + false, + 0, + nodes, + true, + false, + false, + ) + + t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name) + strategies.RemovePodsViolatingTopologySpreadConstraint( + ctx, + clientSet, + deschedulerapi.DeschedulerStrategy{ + Enabled: true, + Params: &deschedulerapi.StrategyParameters{ + IncludeSoftConstraints: tc.constraint != v1.DoNotSchedule, + }, + }, + nodes, + podEvictor, + ) + t.Logf("Finished RemovePodsViolatingTopologySpreadConstraint strategy for %s", name) + + t.Logf("Wait for terminating pods of %s to disappear", name) + waitForTerminatingPodsToDisappear(ctx, t, clientSet, rc.Namespace) + + if totalEvicted := podEvictor.TotalEvicted(); totalEvicted > 0 { + t.Logf("Total of %d Pods were evicted for %s", totalEvicted, name) + } else { + t.Fatalf("Pods were not evicted for %s TopologySpreadConstraint", name) + } + + pods, err := clientSet.CoreV1().Pods(testNamespace.Name).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", tc.labelKey, tc.labelValue)}) + if err != nil { + t.Errorf("Error listing pods for %s: %v", name, err) + } + + nodePodCountMap := make(map[string]int) + for _, pod := range pods.Items { + nodePodCountMap[pod.Spec.NodeName]++ + } + + if len(nodePodCountMap) != len(workerNodes) { + t.Errorf("%s Pods were scheduled on only '%d' nodes and were not properly distributed on the nodes", name, len(nodePodCountMap)) + } + + min, max := getMinAndMaxPodDistribution(nodePodCountMap) + if max-min > tc.maxSkew { + t.Errorf("Pod distribution for %s is still violating the max skew of %d as it is %d", name, tc.maxSkew, max-min) + } + + t.Logf("Pods for %s were distributed in line with max skew of %d", name, tc.maxSkew) + }) + } +} + +func makeTopologySpreadConstraints(maxSkew int, labelKey, labelValue string, constraint v1.UnsatisfiableConstraintAction) []v1.TopologySpreadConstraint { + return []v1.TopologySpreadConstraint{ + { + MaxSkew: int32(maxSkew), + TopologyKey: zoneTopologyKey, + WhenUnsatisfiable: constraint, + LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{labelKey: labelValue}}, + }, + } +} + +func getMinAndMaxPodDistribution(nodePodCountMap map[string]int) (int, int) { + min := math.MaxInt32 + max := math.MinInt32 + for _, podCount := range nodePodCountMap { + if podCount < min { + min = podCount + } + if podCount > max { + max = podCount + } + } + + return min, max +} From 0de8002b7d198cbbf212339f57405b2e8d33be91 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Fri, 9 Jul 2021 11:34:29 -0400 Subject: [PATCH 2/2] Update gce scripts to spread nodes over 2 zones --- hack/e2e-gce/gcloud_create_cluster.sh | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/hack/e2e-gce/gcloud_create_cluster.sh b/hack/e2e-gce/gcloud_create_cluster.sh index a9c8c62d1..fd68c1fad 100755 --- a/hack/e2e-gce/gcloud_create_cluster.sh +++ b/hack/e2e-gce/gcloud_create_cluster.sh @@ -26,7 +26,7 @@ create_cluster() { echo "gcloud compute instances delete descheduler-$node1_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh gcloud compute instances create descheduler-$node2_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b - echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh + echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-c --quiet" >> $E2E_GCE_HOME/delete_cluster.sh # Delete the firewall port created for master. echo "gcloud compute firewall-rules delete kubeapiserver-$master_uuid --quiet" >> $E2E_GCE_HOME/delete_cluster.sh @@ -44,10 +44,10 @@ generate_kubeadm_instance_files() { transfer_install_files() { - gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b + gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b gcloud compute scp $E2E_GCE_HOME/kubeadm_install.sh descheduler-$master_uuid:/tmp --zone=us-east1-b - gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b - gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b + gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b + gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c } @@ -55,7 +55,7 @@ install_kube() { # Docker installation. gcloud compute ssh descheduler-$master_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b gcloud compute ssh descheduler-$node1_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b - gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b + gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-c # kubeadm installation. # 1. Transfer files to master, nodes. transfer_install_files @@ -81,10 +81,9 @@ install_kube() { gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b - gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b - gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b - gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b - + gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-c + gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c + gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-c }