1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #105 from ravisantoshgudimetla/priority-low-node

Low node utilization to respect priority while evicting pods
This commit is contained in:
RaviSantosh Gudimetla
2018-08-21 14:33:50 -04:00
committed by GitHub
3 changed files with 203 additions and 10 deletions

View File

@@ -41,6 +41,7 @@ type NodeUsageMap struct {
bPods []*v1.Pod
gPods []*v1.Pod
}
type NodePodsMap map[*v1.Node][]*v1.Pod
func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount nodePodEvictedCount) {
@@ -59,7 +60,7 @@ func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerS
return
}
npm := CreateNodePodsMap(ds.Client, nodes)
npm := createNodePodsMap(ds.Client, nodes)
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
glog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
@@ -151,6 +152,9 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr
return lowNodes, targetNodes
}
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, dryRun bool, maxPodsToEvict int, nodepodCount nodePodEvictedCount) int {
podsEvicted := 0
@@ -191,12 +195,27 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
glog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
currentPodsEvicted := nodepodCount[node.node]
// evict best effort pods
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict burstable pods
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict guaranteed pods
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// Check if one pod has priority, if yes, assume that all pods have priority and evict pods based on priority.
if node.allPods[0].Spec.Priority != nil {
glog.V(1).Infof("All pods have priority associated with them. Evicting pods based on priority")
evictablePods := make([]*v1.Pod, 0)
evictablePods = append(append(node.bPods, node.bePods...), node.gPods...)
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
sortPodsBasedOnPriority(evictablePods)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
} else {
// TODO: Remove this when we support only priority.
// Falling back to evicting pods based on priority.
glog.V(1).Infof("Evicting pods based on QoS")
glog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods))
// evict best effort pods
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict burstable pods
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict guaranteed pods
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
}
nodepodCount[node.node] = currentPodsEvicted
podsEvicted = podsEvicted + nodepodCount[node.node]
glog.V(1).Infof("%v pods evicted from node %#v with usage %v", nodepodCount[node.node], node.node.Name, node.usage)
@@ -269,7 +288,30 @@ func SortNodesByUsage(nodes []NodeUsageMap) {
})
}
func CreateNodePodsMap(client clientset.Interface, nodes []*v1.Node) NodePodsMap {
// sortPodsBasedOnPriority sorts pods based on priority and if their priorities are equal, they are sorted based on QoS tiers.
func sortPodsBasedOnPriority(evictablePods []*v1.Pod) {
sort.Slice(evictablePods, func(i, j int) bool {
if evictablePods[i].Spec.Priority == nil && evictablePods[j].Spec.Priority != nil {
return true
}
if evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority != nil {
return false
}
if (evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority == nil) || (*evictablePods[i].Spec.Priority == *evictablePods[j].Spec.Priority) {
if podutil.IsBestEffortPod(evictablePods[i]) {
return true
}
if podutil.IsBurstablePod(evictablePods[i]) && podutil.IsGuaranteedPod(evictablePods[j]) {
return true
}
return false
}
return *evictablePods[i].Spec.Priority < *evictablePods[j].Spec.Priority
})
}
// createNodePodsMap returns nodepodsmap with evictable pods on node.
func createNodePodsMap(client clientset.Interface, nodes []*v1.Node) NodePodsMap {
npm := NodePodsMap{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(client, node)
@@ -308,6 +350,7 @@ func IsNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds
return true
}
// Nodeutilization returns the current usage of node.
func NodeUtilization(node *v1.Node, pods []*v1.Pod) (api.ResourceThresholds, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) {
bePods := []*v1.Pod{}
nonRemovablePods := []*v1.Pod{}

View File

@@ -28,10 +28,11 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"reflect"
)
// TODO: Make this table driven.
func TestLowNodeUtilization(t *testing.T) {
func TestLowNodeUtilizationWithoutPriority(t *testing.T) {
var thresholds = make(api.ResourceThresholds)
var targetThresholds = make(api.ResourceThresholds)
thresholds[v1.ResourceCPU] = 30
@@ -110,7 +111,7 @@ func TestLowNodeUtilization(t *testing.T) {
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
expectedPodsEvicted := 3
npm := CreateNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
npm := createNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
if len(lowNodes) != 1 {
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
@@ -126,6 +127,154 @@ func TestLowNodeUtilization(t *testing.T) {
}
// TODO: Make this table driven.
func TestLowNodeUtilizationWithPriorities(t *testing.T) {
var thresholds = make(api.ResourceThresholds)
var targetThresholds = make(api.ResourceThresholds)
thresholds[v1.ResourceCPU] = 30
thresholds[v1.ResourcePods] = 30
targetThresholds[v1.ResourceCPU] = 50
targetThresholds[v1.ResourcePods] = 50
lowPriority := int32(0)
highPriority := int32(10000)
n1 := test.BuildTestNode("n1", 4000, 3000, 9)
n2 := test.BuildTestNode("n2", 4000, 3000, 10)
n3 := test.BuildTestNode("n3", 4000, 3000, 10)
// Making n3 node unschedulable so that it won't counted in lowUtilized nodes list.
n3.Spec.Unschedulable = true
p1 := test.BuildTestPod("p1", 400, 0, n1.Name)
p1.Spec.Priority = &highPriority
p2 := test.BuildTestPod("p2", 400, 0, n1.Name)
p2.Spec.Priority = &highPriority
p3 := test.BuildTestPod("p3", 400, 0, n1.Name)
p3.Spec.Priority = &highPriority
p4 := test.BuildTestPod("p4", 400, 0, n1.Name)
p4.Spec.Priority = &highPriority
p5 := test.BuildTestPod("p5", 400, 0, n1.Name)
p5.Spec.Priority = &lowPriority
// These won't be evicted.
p6 := test.BuildTestPod("p6", 400, 0, n1.Name)
p6.Spec.Priority = &highPriority
p7 := test.BuildTestPod("p7", 400, 0, n1.Name)
p7.Spec.Priority = &lowPriority
p8 := test.BuildTestPod("p8", 400, 0, n1.Name)
p8.Spec.Priority = &lowPriority
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p2.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p3.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p4.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p5.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
// A daemonset.
p6.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A pod with local storage.
p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p7.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
p7.Annotations = test.GetMirrorPodAnnotation()
// A Critical Pod.
p8.Namespace = "kube-system"
p8.Annotations = test.GetCriticalPodAnnotation()
p9 := test.BuildTestPod("p9", 400, 0, n1.Name)
p9.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, "n1") {
return true, &v1.PodList{Items: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8}}, nil
}
if strings.Contains(fieldString, "n2") {
return true, &v1.PodList{Items: []v1.Pod{*p9}}, nil
}
if strings.Contains(fieldString, "n3") {
return true, &v1.PodList{Items: []v1.Pod{}}, nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.GetAction)
switch getAction.GetName() {
case n1.Name:
return true, n1, nil
case n2.Name:
return true, n2, nil
case n3.Name:
return true, n3, nil
}
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
expectedPodsEvicted := 3
npm := createNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
if len(lowNodes) != 1 {
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
}
npe := nodePodEvictedCount{}
npe[n1] = 0
npe[n2] = 0
npe[n3] = 0
podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, npe)
if expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", expectedPodsEvicted, podsEvicted)
}
}
func TestSortPodsByPriority(t *testing.T) {
n1 := test.BuildTestNode("n1", 4000, 3000, 9)
lowPriority := int32(0)
highPriority := int32(10000)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name)
p1.Spec.Priority = &lowPriority
// BestEffort
p2 := test.BuildTestPod("p2", 400, 0, n1.Name)
p2.Spec.Priority = &highPriority
p2.Spec.Containers[0].Resources.Requests = nil
p2.Spec.Containers[0].Resources.Limits = nil
// Burstable
p3 := test.BuildTestPod("p3", 400, 0, n1.Name)
p3.Spec.Priority = &highPriority
// Guaranteed
p4 := test.BuildTestPod("p4", 400, 100, n1.Name)
p4.Spec.Priority = &highPriority
p4.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = *resource.NewMilliQuantity(400, resource.DecimalSI)
p4.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = *resource.NewQuantity(100, resource.DecimalSI)
// Best effort with nil priorities.
p5 := test.BuildTestPod("p5", 400, 100, n1.Name)
p5.Spec.Priority = nil
p6 := test.BuildTestPod("p6", 400, 100, n1.Name)
p6.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = *resource.NewMilliQuantity(400, resource.DecimalSI)
p6.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = *resource.NewQuantity(100, resource.DecimalSI)
p6.Spec.Priority = nil
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
sortPodsBasedOnPriority(podList)
for _, pod := range podList {
fmt.Println(pod)
}
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
}
}
func TestValidateThresholds(t *testing.T) {
tests := []struct {
name string

View File

@@ -37,6 +37,7 @@ func BuildTestPod(name string, cpu int64, memory int64, nodeName string) *v1.Pod
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{},
Limits: v1.ResourceList{},
},
},
},