mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-25 20:59:28 +01:00
Merge pull request #751 from HelmutLety/redo_#473
feat: Add DeviationThreshold Paramter for LowNodeUtilization, (Previous attempt - #473 )
This commit is contained in:
@@ -219,6 +219,11 @@ These thresholds, `thresholds` and `targetThresholds`, could be tuned as per you
|
||||
strategy evicts pods from `overutilized nodes` (those with usage above `targetThresholds`) to `underutilized nodes`
|
||||
(those with usage below `thresholds`), it will abort if any number of `underutilized nodes` or `overutilized nodes` is zero.
|
||||
|
||||
Additionally, the strategy accepts a `useDeviationThresholds` parameter.
|
||||
If that parameter is set to `true`, the thresholds are considered as percentage deviations from mean resource usage.
|
||||
`thresholds` will be deducted from the mean among all nodes and `targetThresholds` will be added to the mean.
|
||||
A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization).
|
||||
|
||||
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
|
||||
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
|
||||
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
|
||||
@@ -232,6 +237,7 @@ actual usage metrics. Implementing metrics-based descheduling is currently TODO
|
||||
|`thresholds`|map(string:int)|
|
||||
|`targetThresholds`|map(string:int)|
|
||||
|`numberOfNodes`|int|
|
||||
|`useDeviationThresholds`|bool|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
@@ -96,9 +96,10 @@ type Percentage float64
|
||||
type ResourceThresholds map[v1.ResourceName]Percentage
|
||||
|
||||
type NodeResourceUtilizationThresholds struct {
|
||||
Thresholds ResourceThresholds
|
||||
TargetThresholds ResourceThresholds
|
||||
NumberOfNodes int
|
||||
UseDeviationThresholds bool
|
||||
Thresholds ResourceThresholds
|
||||
TargetThresholds ResourceThresholds
|
||||
NumberOfNodes int
|
||||
}
|
||||
|
||||
type PodsHavingTooManyRestarts struct {
|
||||
|
||||
@@ -94,9 +94,10 @@ type Percentage float64
|
||||
type ResourceThresholds map[v1.ResourceName]Percentage
|
||||
|
||||
type NodeResourceUtilizationThresholds struct {
|
||||
Thresholds ResourceThresholds `json:"thresholds,omitempty"`
|
||||
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
UseDeviationThresholds bool `json:"useDeviationThresholds,omitempty"`
|
||||
Thresholds ResourceThresholds `json:"thresholds,omitempty"`
|
||||
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
}
|
||||
|
||||
type PodsHavingTooManyRestarts struct {
|
||||
|
||||
@@ -261,6 +261,7 @@ func Convert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Name
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(in *NodeResourceUtilizationThresholds, out *api.NodeResourceUtilizationThresholds, s conversion.Scope) error {
|
||||
out.UseDeviationThresholds = in.UseDeviationThresholds
|
||||
out.Thresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
|
||||
out.TargetThresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
|
||||
out.NumberOfNodes = in.NumberOfNodes
|
||||
@@ -273,6 +274,7 @@ func Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtili
|
||||
}
|
||||
|
||||
func autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in *api.NodeResourceUtilizationThresholds, out *NodeResourceUtilizationThresholds, s conversion.Scope) error {
|
||||
out.UseDeviationThresholds = in.UseDeviationThresholds
|
||||
out.Thresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
|
||||
out.TargetThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
|
||||
out.NumberOfNodes = in.NumberOfNodes
|
||||
|
||||
@@ -64,7 +64,7 @@ func HighNodeUtilization(ctx context.Context, client clientset.Interface, strate
|
||||
|
||||
sourceNodes, highNodes := classifyNodes(
|
||||
getNodeUsage(nodes, resourceNames, getPodsAssignedToNode),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode, false),
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
||||
},
|
||||
|
||||
@@ -50,31 +50,47 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
if strategy.Params != nil {
|
||||
nodeFit = strategy.Params.NodeFit
|
||||
}
|
||||
|
||||
useDeviationThresholds := strategy.Params.NodeResourceUtilizationThresholds.UseDeviationThresholds
|
||||
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
||||
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
||||
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
|
||||
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds, useDeviationThresholds); err != nil {
|
||||
klog.ErrorS(err, "LowNodeUtilization config is not valid")
|
||||
return
|
||||
}
|
||||
|
||||
// check if Pods/CPU/Mem are set, if not, set them to 100
|
||||
if _, ok := thresholds[v1.ResourcePods]; !ok {
|
||||
thresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
if useDeviationThresholds {
|
||||
thresholds[v1.ResourcePods] = MinResourcePercentage
|
||||
targetThresholds[v1.ResourcePods] = MinResourcePercentage
|
||||
} else {
|
||||
thresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
if _, ok := thresholds[v1.ResourceCPU]; !ok {
|
||||
thresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
if useDeviationThresholds {
|
||||
thresholds[v1.ResourceCPU] = MinResourcePercentage
|
||||
targetThresholds[v1.ResourceCPU] = MinResourcePercentage
|
||||
} else {
|
||||
thresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
if _, ok := thresholds[v1.ResourceMemory]; !ok {
|
||||
thresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
if useDeviationThresholds {
|
||||
thresholds[v1.ResourceMemory] = MinResourcePercentage
|
||||
targetThresholds[v1.ResourceMemory] = MinResourcePercentage
|
||||
} else {
|
||||
thresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
resourceNames := getResourceNames(thresholds)
|
||||
|
||||
lowNodes, sourceNodes := classifyNodes(
|
||||
getNodeUsage(nodes, resourceNames, getPodsAssignedToNode),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode, useDeviationThresholds),
|
||||
// The node has to be schedulable (to be able to move workload there)
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(node) {
|
||||
@@ -166,7 +182,7 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
|
||||
}
|
||||
|
||||
// validateLowUtilizationStrategyConfig checks if the strategy's config is valid
|
||||
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
|
||||
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) error {
|
||||
// validate thresholds and targetThresholds config
|
||||
if err := validateThresholds(thresholds); err != nil {
|
||||
return fmt.Errorf("thresholds config is not valid: %v", err)
|
||||
@@ -182,7 +198,7 @@ func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.Resou
|
||||
for resourceName, value := range thresholds {
|
||||
if targetValue, ok := targetThresholds[resourceName]; !ok {
|
||||
return fmt.Errorf("thresholds and targetThresholds configured different resources")
|
||||
} else if value > targetValue {
|
||||
} else if value > targetValue && !useDeviationThresholds {
|
||||
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +48,7 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
useDeviationThresholds bool
|
||||
thresholds, targetThresholds api.ResourceThresholds
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
@@ -643,6 +644,57 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
},
|
||||
expectedPodsEvicted: 3,
|
||||
},
|
||||
{
|
||||
name: "deviation thresholds",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 5,
|
||||
v1.ResourcePods: 5,
|
||||
},
|
||||
targetThresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 5,
|
||||
v1.ResourcePods: 5,
|
||||
},
|
||||
useDeviationThresholds: true,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A pod with local storage.
|
||||
test.SetNormalOwnerRef(pod)
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 2,
|
||||
evictedPods: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
@@ -731,8 +783,9 @@ func TestLowNodeUtilization(t *testing.T) {
|
||||
Enabled: true,
|
||||
Params: &api.StrategyParameters{
|
||||
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
|
||||
Thresholds: test.thresholds,
|
||||
TargetThresholds: test.targetThresholds,
|
||||
Thresholds: test.thresholds,
|
||||
TargetThresholds: test.targetThresholds,
|
||||
UseDeviationThresholds: test.useDeviationThresholds,
|
||||
},
|
||||
NodeFit: true,
|
||||
},
|
||||
@@ -890,7 +943,7 @@ func TestValidateLowNodeUtilizationStrategyConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range tests {
|
||||
validateErr := validateLowUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
|
||||
validateErr := validateLowUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds, false)
|
||||
|
||||
if validateErr == nil || testCase.errInfo == nil {
|
||||
if validateErr != testCase.errInfo {
|
||||
|
||||
@@ -84,12 +84,30 @@ func validateThresholds(thresholds api.ResourceThresholds) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func normalizePercentage(percent api.Percentage) api.Percentage {
|
||||
if percent > MaxResourcePercentage {
|
||||
return MaxResourcePercentage
|
||||
}
|
||||
if percent < MinResourcePercentage {
|
||||
return MinResourcePercentage
|
||||
}
|
||||
return percent
|
||||
}
|
||||
|
||||
func getNodeThresholds(
|
||||
nodes []*v1.Node,
|
||||
lowThreshold, highThreshold api.ResourceThresholds,
|
||||
resourceNames []v1.ResourceName,
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||
useDeviationThresholds bool,
|
||||
) map[string]NodeThresholds {
|
||||
nodeThresholdsMap := map[string]NodeThresholds{}
|
||||
|
||||
averageResourceUsagePercent := api.ResourceThresholds{}
|
||||
if useDeviationThresholds {
|
||||
averageResourceUsagePercent = averageNodeBasicresources(nodes, getPodsAssignedToNode, resourceNames)
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
nodeCapacity := node.Status.Capacity
|
||||
if len(node.Status.Allocatable) > 0 {
|
||||
@@ -102,8 +120,19 @@ func getNodeThresholds(
|
||||
}
|
||||
|
||||
for _, resourceName := range resourceNames {
|
||||
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, lowThreshold[resourceName])
|
||||
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, highThreshold[resourceName])
|
||||
if useDeviationThresholds {
|
||||
cap := nodeCapacity[resourceName]
|
||||
if lowThreshold[resourceName] == MinResourcePercentage {
|
||||
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = &cap
|
||||
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = &cap
|
||||
} else {
|
||||
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]-lowThreshold[resourceName]))
|
||||
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, normalizePercentage(averageResourceUsagePercent[resourceName]+highThreshold[resourceName]))
|
||||
}
|
||||
} else {
|
||||
nodeThresholdsMap[node.Name].lowResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, lowThreshold[resourceName])
|
||||
nodeThresholdsMap[node.Name].highResourceThreshold[resourceName] = resourceThreshold(nodeCapacity, resourceName, highThreshold[resourceName])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -430,3 +459,34 @@ func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*
|
||||
|
||||
return nonRemovablePods, removablePods
|
||||
}
|
||||
|
||||
func averageNodeBasicresources(nodes []*v1.Node, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, resourceNames []v1.ResourceName) api.ResourceThresholds {
|
||||
total := api.ResourceThresholds{}
|
||||
average := api.ResourceThresholds{}
|
||||
numberOfNodes := len(nodes)
|
||||
for _, node := range nodes {
|
||||
pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, nil)
|
||||
if err != nil {
|
||||
numberOfNodes--
|
||||
continue
|
||||
}
|
||||
usage := nodeUtilization(node, pods, resourceNames)
|
||||
nodeCapacity := node.Status.Capacity
|
||||
if len(node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.Status.Allocatable
|
||||
}
|
||||
for resource, value := range usage {
|
||||
nodeCapacityValue := nodeCapacity[resource]
|
||||
if resource == v1.ResourceCPU {
|
||||
total[resource] += api.Percentage(value.MilliValue()) / api.Percentage(nodeCapacityValue.MilliValue()) * 100.0
|
||||
} else {
|
||||
total[resource] += api.Percentage(value.Value()) / api.Percentage(nodeCapacityValue.Value()) * 100.0
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
for resource, value := range total {
|
||||
average[resource] = value / api.Percentage(numberOfNodes)
|
||||
}
|
||||
return average
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user