1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #285 from lixiang233/Ft_change_lowUtilization_break

Change break condition and thresholds validation for lowUtilization
This commit is contained in:
Kubernetes Prow Robot
2020-05-28 07:36:02 -07:00
committed by GitHub
3 changed files with 284 additions and 58 deletions

View File

@@ -18,6 +18,7 @@ package strategies
import (
"context"
"fmt"
"sort"
v1 "k8s.io/api/core/v1"
@@ -40,6 +41,11 @@ type NodeUsageMap struct {
type NodePodsMap map[*v1.Node][]*v1.Pod
const (
MinResourcePercentage = 0
MaxResourcePercentage = 100
)
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
// todo: move to config validation?
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
@@ -49,12 +55,23 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
if !validateThresholds(thresholds) {
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
klog.Errorf("LowNodeUtilization config is not valid: %v", err)
return
}
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if !validateTargetThresholds(targetThresholds) {
return
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
thresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
}
npm := createNodePodsMap(ctx, client, nodes)
@@ -99,37 +116,46 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
}
func validateThresholds(thresholds api.ResourceThresholds) bool {
if thresholds == nil || len(thresholds) == 0 {
klog.V(1).Infof("no resource threshold is configured")
return false
// validateStrategyConfig checks if the strategy's config is valid
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
for name := range thresholds {
switch name {
case v1.ResourceCPU:
continue
case v1.ResourceMemory:
continue
case v1.ResourcePods:
continue
default:
klog.Errorf("only cpu, memory, or pods thresholds can be specified")
return false
if err := validateThresholds(targetThresholds); err != nil {
return fmt.Errorf("targetThresholds config is not valid: %v", err)
}
// validate if thresholds and targetThresholds have same resources configured
if len(thresholds) != len(targetThresholds) {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
}
for resourceName, value := range thresholds {
if targetValue, ok := targetThresholds[resourceName]; !ok {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
} else if value > targetValue {
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
}
}
return true
return nil
}
//This function could be merged into above once we are clear.
func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool {
if targetThresholds == nil {
klog.V(1).Infof("no target resource threshold is configured")
return false
} else if _, ok := targetThresholds[v1.ResourcePods]; !ok {
klog.V(1).Infof("no target resource threshold for pods is configured")
return false
// validateThresholds checks if thresholds have valid resource name and resource percentage configured
func validateThresholds(thresholds api.ResourceThresholds) error {
if thresholds == nil || len(thresholds) == 0 {
return fmt.Errorf("no resource threshold is configured")
}
return true
for name, percent := range thresholds {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
if percent < MinResourcePercentage || percent > MaxResourcePercentage {
return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage)
}
default:
return fmt.Errorf("only cpu, memory, or pods thresholds can be specified")
}
}
return nil
}
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
@@ -184,16 +210,12 @@ func evictPodsFromTargetNodes(
totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100)
// totalCPU capacity to be moved
if _, ok := targetThresholds[v1.ResourceCPU]; ok {
cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
}
cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
// totalMem capacity to be moved
if _, ok := targetThresholds[v1.ResourceMemory]; ok {
memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
}
memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
}
klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods)
@@ -246,7 +268,8 @@ func evictPods(
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
node *v1.Node) {
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && *totalPods > 0 && *totalCPU > 0 && *totalMem > 0 {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
@@ -278,8 +301,8 @@ func evictPods(
nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100)
klog.V(3).Infof("updated node usage: %#v", nodeUsage)
// check if node utilization drops below target threshold or required capacity (cpu, memory, pods) is moved
if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || (*totalPods <= 0 && *totalCPU <= 0 && *totalMem <= 0) {
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || *totalPods <= 0 || *totalCPU <= 0 || *totalMem <= 0 {
break
}
}

View File

@@ -80,8 +80,11 @@ func TestLowNodeUtilization(t *testing.T) {
thresholds, targetThresholds api.ResourceThresholds
nodes map[string]*v1.Node
pods map[string]*v1.PodList
expectedPodsEvicted int
evictedPods []string
// TODO: divide expectedPodsEvicted into two params like other tests
// expectedPodsEvicted should be the result num of pods that this testCase expected but now it represents both
// MaxNoOfPodsToEvictPerNode and the test's expected result
expectedPodsEvicted int
evictedPods []string
}{
{
name: "without priorities",
@@ -141,6 +144,65 @@ func TestLowNodeUtilization(t *testing.T) {
},
expectedPodsEvicted: 3,
},
{
name: "without priorities stop when cpu capacity is depleted",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 300, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p2", 400, 300, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p3", 400, 300, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p4", 400, 300, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p5", 400, 300, n1NodeName, setRSOwnerRef),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 300, n1NodeName, setDSOwnerRef),
*test.BuildTestPod("p7", 400, 300, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
setNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 300, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 2100, n1NodeName, setRSOwnerRef),
},
},
n3NodeName: {},
},
// 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before cpu is depleted
expectedPodsEvicted: 3,
},
{
name: "with priorities",
thresholds: api.ResourceThresholds{
@@ -346,11 +408,6 @@ func TestLowNodeUtilization(t *testing.T) {
nodes = append(nodes, node)
}
npm := createNodePodsMap(ctx, fakeClient, nodes)
lowNodes, targetNodes := classifyNodes(npm, test.thresholds, test.targetThresholds, false)
if len(lowNodes) != 1 {
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
}
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
@@ -359,7 +416,17 @@ func TestLowNodeUtilization(t *testing.T) {
nodes,
)
evictPodsFromTargetNodes(ctx, targetNodes, lowNodes, test.targetThresholds, false, podEvictor)
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: test.thresholds,
TargetThresholds: test.targetThresholds,
},
},
}
LowNodeUtilization(ctx, fakeClient, strategy, nodes, false, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if test.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted)
@@ -409,21 +476,120 @@ func TestSortPodsByPriority(t *testing.T) {
}
}
func TestValidateStrategyConfig(t *testing.T) {
tests := []struct {
name string
thresholds api.ResourceThresholds
targetThresholds api.ResourceThresholds
errInfo error
}{
{
name: "passing invalid thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 120,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
},
{
name: "passing invalid targetThresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
"resourceInvalid": 80,
},
errInfo: fmt.Errorf("targetThresholds config is not valid: %v",
fmt.Errorf("only cpu, memory, or pods thresholds can be specified")),
},
{
name: "thresholds and targetThresholds configured different num of resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
v1.ResourcePods: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds and targetThresholds configured different resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourcePods: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds' CPU config value is greater than targetThresholds'",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 90,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU),
},
{
name: "passing valid strategy config",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: nil,
},
}
for _, testCase := range tests {
validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds)
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
} else if validateErr.Error() != testCase.errInfo.Error() {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
}
}
func TestValidateThresholds(t *testing.T) {
tests := []struct {
name string
input api.ResourceThresholds
succeed bool
errInfo error
}{
{
name: "passing nil map for threshold",
input: nil,
succeed: false,
errInfo: fmt.Errorf("no resource threshold is configured"),
},
{
name: "passing no threshold",
input: api.ResourceThresholds{},
succeed: false,
errInfo: fmt.Errorf("no resource threshold is configured"),
},
{
name: "passing unsupported resource name",
@@ -431,7 +597,7 @@ func TestValidateThresholds(t *testing.T) {
v1.ResourceCPU: 40,
v1.ResourceStorage: 25.5,
},
succeed: false,
errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"),
},
{
name: "passing invalid resource name",
@@ -439,7 +605,30 @@ func TestValidateThresholds(t *testing.T) {
v1.ResourceCPU: 40,
"coolResource": 42.0,
},
succeed: false,
errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"),
},
{
name: "passing invalid resource value",
input: api.ResourceThresholds{
v1.ResourceCPU: 110,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("%v threshold not in [%v, %v] range", v1.ResourceCPU, MinResourcePercentage, MaxResourcePercentage),
},
{
name: "passing a valid threshold with max and min resource value",
input: api.ResourceThresholds{
v1.ResourceCPU: 100,
v1.ResourceMemory: 0,
},
errInfo: nil,
},
{
name: "passing a valid threshold with only cpu",
input: api.ResourceThresholds{
v1.ResourceCPU: 80,
},
errInfo: nil,
},
{
name: "passing a valid threshold with cpu, memory and pods",
@@ -448,15 +637,19 @@ func TestValidateThresholds(t *testing.T) {
v1.ResourceMemory: 30,
v1.ResourcePods: 40,
},
succeed: true,
errInfo: nil,
},
}
for _, test := range tests {
isValid := validateThresholds(test.input)
validateErr := validateThresholds(test.input)
if isValid != test.succeed {
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.succeed, isValid)
if validateErr == nil || test.errInfo == nil {
if validateErr != test.errInfo {
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr)
}
} else if validateErr.Error() != test.errInfo.Error() {
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr)
}
}
}