diff --git a/pkg/rescheduler/strategies/duplicates_test.go b/pkg/rescheduler/strategies/duplicates_test.go index 3751c2909..27a07744b 100644 --- a/pkg/rescheduler/strategies/duplicates_test.go +++ b/pkg/rescheduler/strategies/duplicates_test.go @@ -60,7 +60,7 @@ func buildTestPod(name string, cpu int64, memory int64, nodeName string) *v1.Pod } // buildTestNode creates a node with specified capacity. -func buildTestNode(name string, millicpu int64, mem int64) *v1.Node { +func buildTestNode(name string, millicpu int64, mem int64, pods int64) *v1.Node { node := &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -69,12 +69,12 @@ func buildTestNode(name string, millicpu int64, mem int64) *v1.Node { }, Status: v1.NodeStatus{ Capacity: v1.ResourceList{ - v1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI), + v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(millicpu, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(mem, resource.DecimalSI), }, Allocatable: v1.ResourceList{ - v1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI), + v1.ResourcePods: *resource.NewQuantity(pods, resource.DecimalSI), v1.ResourceCPU: *resource.NewMilliQuantity(millicpu, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(mem, resource.DecimalSI), }, @@ -96,7 +96,7 @@ func getMirrorPodAnnotation() map[string]string { } } -// getNormalPodAnnotation returns the annotation needed for a normal pod. A normal pod is one without any references to +// getNormalPodAnnotation returns the annotation needed for a pod. func getNormalPodAnnotation() map[string]string { return map[string]string{ "kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"Pod\"}}", @@ -117,7 +117,7 @@ func getDaemonSetAnnotation() map[string]string { } } -// getCriticalPodAnnotation returns the annotation needed for daemonset pod. +// getCriticalPodAnnotation returns the annotation needed for critical pod. func getCriticalPodAnnotation() map[string]string { return map[string]string{ "kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"Pod\"}}", @@ -127,7 +127,7 @@ func getCriticalPodAnnotation() map[string]string { //TODO:@ravisantoshgudimetla This could be made table driven. func TestFindDuplicatePods(t *testing.T) { - node := buildTestNode("n1", 2000, 3000) + node := buildTestNode("n1", 2000, 3000, 10) p1 := buildTestPod("p1", 100, 0, node.Name) p2 := buildTestPod("p2", 100, 0, node.Name) p3 := buildTestPod("p3", 100, 0, node.Name) diff --git a/pkg/rescheduler/strategies/lownodeutilization.go b/pkg/rescheduler/strategies/lownodeutilization.go index 3d3b43479..61857164e 100644 --- a/pkg/rescheduler/strategies/lownodeutilization.go +++ b/pkg/rescheduler/strategies/lownodeutilization.go @@ -43,50 +43,20 @@ func LowNodeUtilization(client clientset.Interface, strategy api.ReschedulerStra if !strategy.Enabled { return } - // todo: move to config validation? + // TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params? + thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds - if thresholds == nil { - fmt.Printf("no resource threshold is configured\n") + if !validateThresholds(thresholds) { return - } else { - found := false - for name, _ := range thresholds { - if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods { - found = true - break - } - } - if !found { - fmt.Printf("one of cpu, memory, or pods resource threshold must be configured\n") - return - } } - targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds - - if targetThresholds == nil { - fmt.Printf("no target resource threshold is configured\n") - return - } else if _, ok := targetThresholds[v1.ResourcePods]; !ok { - fmt.Printf("no target resource threshold for pods is configured\n") + if !validateTargetThresholds(targetThresholds) { return } npm := CreateNodePodsMap(client, nodes) - lowNodes, targetNodes, otherNodes := []NodeUsageMap{}, []NodeUsageMap{}, []NodeUsageMap{} - for node, pods := range npm { - usage, bePods, nonRemovablePods, otherPods := NodeUtilization(node, pods) - nuMap := NodeUsageMap{node, usage, bePods, nonRemovablePods, otherPods} - fmt.Printf("Node %#v usage: %#v\n", node.Name, usage) - if IsNodeWithLowUtilization(usage, thresholds) { - lowNodes = append(lowNodes, nuMap) - } else if IsNodeAboveTargetUtilization(usage, targetThresholds) { - targetNodes = append(targetNodes, nuMap) - } else { - otherNodes = append(otherNodes, nuMap) - } - } + lowNodes, targetNodes, _ := classifyNodes(npm, thresholds, targetThresholds) if len(lowNodes) == 0 { fmt.Printf("No node is underutilized\n") @@ -101,6 +71,61 @@ func LowNodeUtilization(client clientset.Interface, strategy api.ReschedulerStra fmt.Printf("no node is above target utilization\n") return } + evictPodsFromTargetNodes(client, evictionPolicyGroupVersion, targetNodes, lowNodes, targetThresholds) +} + +func validateThresholds(thresholds api.ResourceThresholds) bool { + if thresholds == nil { + fmt.Printf("no resource threshold is configured\n") + return false + } + found := false + for name, _ := range thresholds { + if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods { + found = true + break + } + } + if !found { + fmt.Printf("one of cpu, memory, or pods resource threshold must be configured\n") + return false + } + return found +} + +//This function could be merged into above once we are clear. +func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool { + if targetThresholds == nil { + fmt.Printf("no target resource threshold is configured\n") + return false + } else if _, ok := targetThresholds[v1.ResourcePods]; !ok { + fmt.Printf("no target resource threshold for pods is configured\n") + return false + } + return true +} + +func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds) ([]NodeUsageMap, []NodeUsageMap, []NodeUsageMap) { + lowNodes, targetNodes, otherNodes := []NodeUsageMap{}, []NodeUsageMap{}, []NodeUsageMap{} + for node, pods := range npm { + usage, bePods, nonRemovablePods, otherPods := NodeUtilization(node, pods) + nuMap := NodeUsageMap{node, usage, bePods, nonRemovablePods, otherPods} + fmt.Printf("Node %#v usage: %#v\n", node.Name, usage) + if IsNodeWithLowUtilization(usage, thresholds) { + lowNodes = append(lowNodes, nuMap) + } else if IsNodeAboveTargetUtilization(usage, targetThresholds) { + targetNodes = append(targetNodes, nuMap) + } else { + // Seems we don't need to collect them? + otherNodes = append(otherNodes, nuMap) + } + } + return lowNodes, targetNodes, otherNodes +} + +func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes []NodeUsageMap, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds) int { + podsEvicted := 0 + SortNodesByUsage(targetNodes) // total number of pods to be moved @@ -116,6 +141,7 @@ func LowNodeUtilization(client clientset.Interface, strategy api.ReschedulerStra for _, node := range targetNodes { nodePodsUsage := node.usage[v1.ResourcePods] + nodeCapcity := node.node.Status.Capacity if len(node.node.Status.Allocatable) > 0 { nodeCapcity = node.node.Status.Allocatable @@ -143,6 +169,7 @@ func LowNodeUtilization(client clientset.Interface, strategy api.ReschedulerStra fmt.Printf("Error when evicting pod: %#v (%#v)\n", pod.Name, err) } else { fmt.Printf("Evicted pod: %#v (%#v)\n", pod.Name, err) + podsEvicted++ nodePodsUsage = nodePodsUsage - onePodPercentage totalPods-- if nodePodsUsage <= targetThresholds[v1.ResourcePods] || totalPods <= 0 { @@ -153,6 +180,7 @@ func LowNodeUtilization(client clientset.Interface, strategy api.ReschedulerStra } } } + return podsEvicted } func SortNodesByUsage(nodes []NodeUsageMap) { @@ -216,7 +244,6 @@ func NodeUtilization(node *v1.Node, pods []*v1.Pod) (api.ResourceThresholds, []* bePods := []*v1.Pod{} nonRemovablePods := []*v1.Pod{} otherPods := []*v1.Pod{} - totalReqs := map[v1.ResourceName]resource.Quantity{} for _, pod := range pods { sr, err := podutil.CreatorRef(pod) diff --git a/pkg/rescheduler/strategies/lownodeutilization_test.go b/pkg/rescheduler/strategies/lownodeutilization_test.go new file mode 100644 index 000000000..22945e6e0 --- /dev/null +++ b/pkg/rescheduler/strategies/lownodeutilization_test.go @@ -0,0 +1,110 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "fmt" + "github.com/aveshagarwal/rescheduler/pkg/api" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/runtime" + core "k8s.io/client-go/testing" + "k8s.io/kubernetes/pkg/api/v1" + "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake" + "strings" + "testing" +) + +// TODO: Make this table driven. +func TestLowNodeUtilization(t *testing.T) { + var thresholds = make(api.ResourceThresholds) + var targetThresholds = make(api.ResourceThresholds) + thresholds[v1.ResourceCPU] = 30 + thresholds[v1.ResourcePods] = 30 + targetThresholds[v1.ResourceCPU] = 50 + targetThresholds[v1.ResourcePods] = 50 + + n1 := buildTestNode("n1", 4000, 3000, 9) + n2 := buildTestNode("n2", 4000, 3000, 10) + p1 := buildTestPod("p1", 400, 0, n1.Name) + p2 := buildTestPod("p2", 400, 0, n1.Name) + p3 := buildTestPod("p3", 400, 0, n1.Name) + p4 := buildTestPod("p4", 400, 0, n1.Name) + p5 := buildTestPod("p5", 400, 0, n1.Name) + + // These won't be evicted. + p6 := buildTestPod("p6", 400, 0, n1.Name) + p7 := buildTestPod("p7", 400, 0, n1.Name) + p8 := buildTestPod("p8", 400, 0, n1.Name) + + p1.Annotations = getReplicaSetAnnotation() + p2.Annotations = getReplicaSetAnnotation() + p3.Annotations = getReplicaSetAnnotation() + p4.Annotations = getReplicaSetAnnotation() + p5.Annotations = getReplicaSetAnnotation() + // The following 4 pods won't get evicted. + // A daemonset. + p6.Annotations = getDaemonSetAnnotation() + // A pod with local storage. + p7.Annotations = getNormalPodAnnotation() + p7.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: *resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + // A Mirror Pod. + p7.Annotations = getMirrorPodAnnotation() + // A Critical Pod. + p8.Namespace = "kube-system" + p8.Annotations = getCriticalPodAnnotation() + p9 := buildTestPod("p9", 400, 0, n1.Name) + p9.Annotations = getReplicaSetAnnotation() + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + list := action.(core.ListAction) + fieldString := list.GetListRestrictions().Fields.String() + if strings.Contains(fieldString, "n1") { + return true, &v1.PodList{Items: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8}}, nil + } + if strings.Contains(fieldString, "n2") { + return true, &v1.PodList{Items: []v1.Pod{*p9}}, nil + } + return true, nil, fmt.Errorf("Failed to list: %v", list) + }) + fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) { + getAction := action.(core.GetAction) + switch getAction.GetName() { + case n1.Name: + return true, n1, nil + case n2.Name: + return true, n2, nil + } + return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName()) + }) + expectedPodsEvicted := 4 + npm := CreateNodePodsMap(fakeClient, []*v1.Node{n1, n2}) + lowNodes, targetNodes, _ := classifyNodes(npm, thresholds, targetThresholds) + podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds) + if expectedPodsEvicted != podsEvicted { + t.Errorf("Expected %#v pods to be evicted but %#v got evicted", expectedPodsEvicted) + } + +}