mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-25 20:59:28 +01:00
Low node utilization to respect priority while evicting pods
This commit is contained in:
@@ -41,6 +41,7 @@ type NodeUsageMap struct {
|
||||
bPods []*v1.Pod
|
||||
gPods []*v1.Pod
|
||||
}
|
||||
|
||||
type NodePodsMap map[*v1.Node][]*v1.Pod
|
||||
|
||||
func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount nodePodEvictedCount) {
|
||||
@@ -59,7 +60,7 @@ func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerS
|
||||
return
|
||||
}
|
||||
|
||||
npm := CreateNodePodsMap(ds.Client, nodes)
|
||||
npm := createNodePodsMap(ds.Client, nodes)
|
||||
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
|
||||
|
||||
glog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
|
||||
@@ -151,6 +152,9 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr
|
||||
return lowNodes, targetNodes
|
||||
}
|
||||
|
||||
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
|
||||
// evicts them based on QoS as fallback option.
|
||||
// TODO: @ravig Break this function into smaller functions.
|
||||
func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, dryRun bool, maxPodsToEvict int, nodepodCount nodePodEvictedCount) int {
|
||||
podsEvicted := 0
|
||||
|
||||
@@ -191,12 +195,27 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
|
||||
glog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
|
||||
currentPodsEvicted := nodepodCount[node.node]
|
||||
|
||||
// evict best effort pods
|
||||
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
// evict burstable pods
|
||||
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
// evict guaranteed pods
|
||||
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
// Check if one pod has priority, if yes, assume that all pods have priority and evict pods based on priority.
|
||||
if node.allPods[0].Spec.Priority != nil {
|
||||
glog.V(1).Infof("All pods have priority associated with them. Evicting pods based on priority")
|
||||
evictablePods := make([]*v1.Pod, 0)
|
||||
evictablePods = append(append(node.bPods, node.bePods...), node.gPods...)
|
||||
|
||||
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
sortPodsBasedOnPriority(evictablePods)
|
||||
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
} else {
|
||||
// TODO: Remove this when we support only priority.
|
||||
// Falling back to evicting pods based on priority.
|
||||
glog.V(1).Infof("Evicting pods based on QoS")
|
||||
glog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods))
|
||||
// evict best effort pods
|
||||
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
// evict burstable pods
|
||||
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
// evict guaranteed pods
|
||||
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
}
|
||||
nodepodCount[node.node] = currentPodsEvicted
|
||||
podsEvicted = podsEvicted + nodepodCount[node.node]
|
||||
glog.V(1).Infof("%v pods evicted from node %#v with usage %v", nodepodCount[node.node], node.node.Name, node.usage)
|
||||
@@ -269,7 +288,30 @@ func SortNodesByUsage(nodes []NodeUsageMap) {
|
||||
})
|
||||
}
|
||||
|
||||
func CreateNodePodsMap(client clientset.Interface, nodes []*v1.Node) NodePodsMap {
|
||||
// sortPodsBasedOnPriority sorts pods based on priority and if their priorities are equal, they are sorted based on QoS tiers.
|
||||
func sortPodsBasedOnPriority(evictablePods []*v1.Pod) {
|
||||
sort.Slice(evictablePods, func(i, j int) bool {
|
||||
if evictablePods[i].Spec.Priority == nil && evictablePods[j].Spec.Priority != nil {
|
||||
return true
|
||||
}
|
||||
if evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority != nil {
|
||||
return false
|
||||
}
|
||||
if (evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority == nil) || (*evictablePods[i].Spec.Priority == *evictablePods[j].Spec.Priority) {
|
||||
if podutil.IsBestEffortPod(evictablePods[i]) {
|
||||
return true
|
||||
}
|
||||
if podutil.IsBurstablePod(evictablePods[i]) && podutil.IsGuaranteedPod(evictablePods[j]) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return *evictablePods[i].Spec.Priority < *evictablePods[j].Spec.Priority
|
||||
})
|
||||
}
|
||||
|
||||
// createNodePodsMap returns nodepodsmap with evictable pods on node.
|
||||
func createNodePodsMap(client clientset.Interface, nodes []*v1.Node) NodePodsMap {
|
||||
npm := NodePodsMap{}
|
||||
for _, node := range nodes {
|
||||
pods, err := podutil.ListPodsOnANode(client, node)
|
||||
@@ -308,6 +350,7 @@ func IsNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds
|
||||
return true
|
||||
}
|
||||
|
||||
// Nodeutilization returns the current usage of node.
|
||||
func NodeUtilization(node *v1.Node, pods []*v1.Pod) (api.ResourceThresholds, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) {
|
||||
bePods := []*v1.Pod{}
|
||||
nonRemovablePods := []*v1.Pod{}
|
||||
|
||||
@@ -28,10 +28,11 @@ import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// TODO: Make this table driven.
|
||||
func TestLowNodeUtilization(t *testing.T) {
|
||||
func TestLowNodeUtilizationWithoutPriority(t *testing.T) {
|
||||
var thresholds = make(api.ResourceThresholds)
|
||||
var targetThresholds = make(api.ResourceThresholds)
|
||||
thresholds[v1.ResourceCPU] = 30
|
||||
@@ -110,7 +111,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
|
||||
})
|
||||
expectedPodsEvicted := 3
|
||||
npm := CreateNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
|
||||
npm := createNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
|
||||
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
|
||||
if len(lowNodes) != 1 {
|
||||
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
|
||||
@@ -126,6 +127,154 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
// TODO: Make this table driven.
|
||||
func TestLowNodeUtilizationWithPriorities(t *testing.T) {
|
||||
var thresholds = make(api.ResourceThresholds)
|
||||
var targetThresholds = make(api.ResourceThresholds)
|
||||
thresholds[v1.ResourceCPU] = 30
|
||||
thresholds[v1.ResourcePods] = 30
|
||||
targetThresholds[v1.ResourceCPU] = 50
|
||||
targetThresholds[v1.ResourcePods] = 50
|
||||
lowPriority := int32(0)
|
||||
highPriority := int32(10000)
|
||||
n1 := test.BuildTestNode("n1", 4000, 3000, 9)
|
||||
n2 := test.BuildTestNode("n2", 4000, 3000, 10)
|
||||
n3 := test.BuildTestNode("n3", 4000, 3000, 10)
|
||||
// Making n3 node unschedulable so that it won't counted in lowUtilized nodes list.
|
||||
n3.Spec.Unschedulable = true
|
||||
p1 := test.BuildTestPod("p1", 400, 0, n1.Name)
|
||||
p1.Spec.Priority = &highPriority
|
||||
p2 := test.BuildTestPod("p2", 400, 0, n1.Name)
|
||||
p2.Spec.Priority = &highPriority
|
||||
p3 := test.BuildTestPod("p3", 400, 0, n1.Name)
|
||||
p3.Spec.Priority = &highPriority
|
||||
p4 := test.BuildTestPod("p4", 400, 0, n1.Name)
|
||||
p4.Spec.Priority = &highPriority
|
||||
p5 := test.BuildTestPod("p5", 400, 0, n1.Name)
|
||||
p5.Spec.Priority = &lowPriority
|
||||
|
||||
// These won't be evicted.
|
||||
p6 := test.BuildTestPod("p6", 400, 0, n1.Name)
|
||||
p6.Spec.Priority = &highPriority
|
||||
p7 := test.BuildTestPod("p7", 400, 0, n1.Name)
|
||||
p7.Spec.Priority = &lowPriority
|
||||
p8 := test.BuildTestPod("p8", 400, 0, n1.Name)
|
||||
p8.Spec.Priority = &lowPriority
|
||||
|
||||
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
p2.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
p3.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
p4.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
p5.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
// The following 4 pods won't get evicted.
|
||||
// A daemonset.
|
||||
p6.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
// A pod with local storage.
|
||||
p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p7.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
p7.Annotations = test.GetMirrorPodAnnotation()
|
||||
// A Critical Pod.
|
||||
p8.Namespace = "kube-system"
|
||||
p8.Annotations = test.GetCriticalPodAnnotation()
|
||||
p9 := test.BuildTestPod("p9", 400, 0, n1.Name)
|
||||
p9.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
list := action.(core.ListAction)
|
||||
fieldString := list.GetListRestrictions().Fields.String()
|
||||
if strings.Contains(fieldString, "n1") {
|
||||
return true, &v1.PodList{Items: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8}}, nil
|
||||
}
|
||||
if strings.Contains(fieldString, "n2") {
|
||||
return true, &v1.PodList{Items: []v1.Pod{*p9}}, nil
|
||||
}
|
||||
if strings.Contains(fieldString, "n3") {
|
||||
return true, &v1.PodList{Items: []v1.Pod{}}, nil
|
||||
}
|
||||
return true, nil, fmt.Errorf("Failed to list: %v", list)
|
||||
})
|
||||
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
|
||||
getAction := action.(core.GetAction)
|
||||
switch getAction.GetName() {
|
||||
case n1.Name:
|
||||
return true, n1, nil
|
||||
case n2.Name:
|
||||
return true, n2, nil
|
||||
case n3.Name:
|
||||
return true, n3, nil
|
||||
}
|
||||
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
|
||||
})
|
||||
expectedPodsEvicted := 3
|
||||
npm := createNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
|
||||
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
|
||||
if len(lowNodes) != 1 {
|
||||
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
|
||||
}
|
||||
npe := nodePodEvictedCount{}
|
||||
npe[n1] = 0
|
||||
npe[n2] = 0
|
||||
npe[n3] = 0
|
||||
podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, npe)
|
||||
if expectedPodsEvicted != podsEvicted {
|
||||
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", expectedPodsEvicted, podsEvicted)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestSortPodsByPriority(t *testing.T) {
|
||||
n1 := test.BuildTestNode("n1", 4000, 3000, 9)
|
||||
lowPriority := int32(0)
|
||||
highPriority := int32(10000)
|
||||
p1 := test.BuildTestPod("p1", 400, 0, n1.Name)
|
||||
p1.Spec.Priority = &lowPriority
|
||||
|
||||
// BestEffort
|
||||
p2 := test.BuildTestPod("p2", 400, 0, n1.Name)
|
||||
p2.Spec.Priority = &highPriority
|
||||
|
||||
p2.Spec.Containers[0].Resources.Requests = nil
|
||||
p2.Spec.Containers[0].Resources.Limits = nil
|
||||
|
||||
// Burstable
|
||||
p3 := test.BuildTestPod("p3", 400, 0, n1.Name)
|
||||
p3.Spec.Priority = &highPriority
|
||||
|
||||
// Guaranteed
|
||||
p4 := test.BuildTestPod("p4", 400, 100, n1.Name)
|
||||
p4.Spec.Priority = &highPriority
|
||||
p4.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = *resource.NewMilliQuantity(400, resource.DecimalSI)
|
||||
p4.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = *resource.NewQuantity(100, resource.DecimalSI)
|
||||
|
||||
// Best effort with nil priorities.
|
||||
p5 := test.BuildTestPod("p5", 400, 100, n1.Name)
|
||||
p5.Spec.Priority = nil
|
||||
p6 := test.BuildTestPod("p6", 400, 100, n1.Name)
|
||||
p6.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = *resource.NewMilliQuantity(400, resource.DecimalSI)
|
||||
p6.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = *resource.NewQuantity(100, resource.DecimalSI)
|
||||
p6.Spec.Priority = nil
|
||||
|
||||
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
|
||||
|
||||
sortPodsBasedOnPriority(podList)
|
||||
for _, pod := range podList {
|
||||
fmt.Println(pod)
|
||||
}
|
||||
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
|
||||
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateThresholds(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@@ -37,6 +37,7 @@ func BuildTestPod(name string, cpu int64, memory int64, nodeName string) *v1.Pod
|
||||
{
|
||||
Resources: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{},
|
||||
Limits: v1.ResourceList{},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user