1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #364 from lixiang233/ft_allow_custom_priority_threshold

Allow custom priority threshold
This commit is contained in:
Kubernetes Prow Robot
2020-08-12 07:35:45 -07:00
committed by GitHub
20 changed files with 420 additions and 57 deletions

View File

@@ -234,7 +234,9 @@ strategies:
maxPodLifeTimeSeconds: 86400
````
## Namespace filtering
## Filter Pods
### Namespace filtering
Strategies like `PodLifeTime`, `RemovePodsHavingTooManyRestarts`, `RemovePodsViolatingNodeTaints`,
`RemovePodsViolatingNodeAffinity` and `RemovePodsViolatingInterPodAntiAffinity` can specify `namespaces`
@@ -276,6 +278,41 @@ The strategy gets executed over all namespaces but `namespace1` and `namespace2`
It's not allowed to compute `include` with `exclude` field.
### Priority filtering
All strategies are able to configure a priority threshold, only pods under the threshold can be evicted. You can
specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given
priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold
is set to the value of `system-cluster-critical` priority class.
E.g.
Setting `thresholdPriority`
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
maxPodLifeTimeSeconds: 86400
thresholdPriority: 10000
```
Setting `thresholdPriorityClassName`
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
maxPodLifeTimeSeconds: 86400
thresholdPriorityClassName: "priorityclass1"
```
Note that you can't configure both `thresholdPriority` and `thresholdPriorityClassName`, if the given priority class
does not exist, descheduler won't create it and will throw an error.
## Pod Evictions
When the descheduler decides to evict pods from a node, it employs the following general mechanism:

View File

@@ -18,4 +18,7 @@ rules:
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
{{- end -}}

View File

@@ -17,6 +17,9 @@ rules:
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
---
apiVersion: v1
kind: ServiceAccount

View File

@@ -52,8 +52,8 @@ type Namespaces struct {
}
// Besides Namespaces only one of its members may be specified
// TODO(jchaloup): move Namespaces to individual strategies once the policy
// version is bumped to v1alpha2
// TODO(jchaloup): move Namespaces ThresholdPriority and ThresholdPriorityClassName to individual strategies
// once the policy version is bumped to v1alpha2
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds
NodeAffinityType []string
@@ -61,6 +61,8 @@ type StrategyParameters struct {
MaxPodLifeTimeSeconds *uint
RemoveDuplicates *RemoveDuplicates
Namespaces Namespaces
ThresholdPriority *int32
ThresholdPriorityClassName string
}
type Percentage float64

View File

@@ -51,7 +51,7 @@ type Namespaces struct {
Exclude []string `json:"exclude"`
}
// Besides Namespaces only one of its members may be specified
// Besides Namespaces ThresholdPriority and ThresholdPriorityClassName only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
@@ -59,6 +59,8 @@ type StrategyParameters struct {
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"`
Namespaces Namespaces `json:"namespaces"`
ThresholdPriority *int32 `json:"thresholdPriority"`
ThresholdPriorityClassName string `json:"thresholdPriorityClassName"`
}
type Percentage float64

View File

@@ -249,6 +249,8 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat
if err := Convert_v1alpha1_Namespaces_To_api_Namespaces(&in.Namespaces, &out.Namespaces, s); err != nil {
return err
}
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
return nil
}
@@ -266,6 +268,8 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
if err := Convert_api_Namespaces_To_v1alpha1_Namespaces(&in.Namespaces, &out.Namespaces, s); err != nil {
return err
}
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
return nil
}

View File

@@ -243,6 +243,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
(*in).DeepCopyInto(*out)
}
in.Namespaces.DeepCopyInto(&out.Namespaces)
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
return
}

View File

@@ -243,6 +243,11 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
(*in).DeepCopyInto(*out)
}
in.Namespaces.DeepCopyInto(&out.Namespaces)
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
return
}

View File

@@ -78,12 +78,16 @@ func NewPodEvictor(
}
// IsEvictable checks if a pod is evictable or not.
func (pe *PodEvictor) IsEvictable(pod *v1.Pod) bool {
func (pe *PodEvictor) IsEvictable(pod *v1.Pod, thresholdPriority int32) bool {
checkErrs := []error{}
if IsCriticalPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is critical"))
}
if !IsPodEvictableBasedOnPriority(pod, thresholdPriority) {
checkErrs = append(checkErrs, fmt.Errorf("pod is not evictable due to its priority"))
}
ownerRefList := podutil.OwnerRef(pod)
if IsDaemonsetPod(ownerRefList) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
@@ -216,3 +220,8 @@ func IsPodWithLocalStorage(pod *v1.Pod) bool {
return false
}
// IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec.
func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool {
return pod.Spec.Priority == nil || (*pod.Spec.Priority < priority && *pod.Spec.Priority < utils.SystemCriticalPriority)
}

View File

@@ -71,10 +71,13 @@ func TestEvictPod(t *testing.T) {
func TestIsEvictable(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
lowPriority := int32(800)
highPriority := int32(900)
type testCase struct {
pod *v1.Pod
runBefore func(*v1.Pod)
evictLocalStoragePods bool
priorityThreshold *int32
result bool
}
@@ -179,6 +182,7 @@ func TestIsEvictable(t *testing.T) {
}, {
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
},
evictLocalStoragePods: false,
@@ -186,6 +190,7 @@ func TestIsEvictable(t *testing.T) {
}, {
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
},
@@ -194,6 +199,7 @@ func TestIsEvictable(t *testing.T) {
}, {
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
@@ -202,6 +208,7 @@ func TestIsEvictable(t *testing.T) {
}, {
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
@@ -210,6 +217,25 @@ func TestIsEvictable(t *testing.T) {
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
priorityThreshold: &lowPriority,
result: false,
}, {
pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
priorityThreshold: &lowPriority,
result: true,
},
}
@@ -218,7 +244,11 @@ func TestIsEvictable(t *testing.T) {
podEvictor := &PodEvictor{
evictLocalStoragePods: test.evictLocalStoragePods,
}
result := podEvictor.IsEvictable(test.pod)
testPriorityThreshold := utils.SystemCriticalPriority
if test.priorityThreshold != nil {
testPriorityThreshold = *test.priorityThreshold
}
result := podEvictor.IsEvictable(test.pod, testPriorityThreshold)
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
}
@@ -233,10 +263,6 @@ func TestPodTypes(t *testing.T) {
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil)
p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil)
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
@@ -257,18 +283,9 @@ func TestPodTypes(t *testing.T) {
}
// A Mirror Pod.
p4.Annotations = test.GetMirrorPodAnnotation()
// A Critical Pod.
p5.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
p5.Spec.Priority = &priority
systemCriticalPriority := utils.SystemCriticalPriority
p5.Spec.Priority = &systemCriticalPriority
if !IsMirrorPod(p4) {
t.Errorf("Expected p4 to be a mirror pod.")
}
if !IsCriticalPod(p5) {
t.Errorf("Expected p5 to be a critical pod.")
}
if !IsPodWithLocalStorage(p3) {
t.Errorf("Expected p3 to be a pod with local storage.")
}

View File

@@ -18,6 +18,7 @@ package strategies
import (
"context"
"fmt"
"reflect"
"sort"
"strings"
@@ -31,8 +32,20 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validateRemoveDuplicatePodsParams(params *api.StrategyParameters) error {
if params == nil {
return nil
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
// namespace, and have at least one container with the same image.
@@ -44,9 +57,21 @@ func RemoveDuplicatePods(
nodes []*v1.Node,
podEvictor *evictions.PodEvictor,
) {
if err := validateRemoveDuplicatePodsParams(strategy.Params); err != nil {
klog.V(1).Info(err)
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
pods, err := podutil.ListPodsOnANode(ctx, client, node, podutil.WithFilter(podEvictor.IsEvictable))
pods, err := podutil.ListPodsOnANode(ctx, client, node, podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, thresholdPriority)
}))
if err != nil {
klog.Errorf("error listing evictable pods on node %s: %+v", node.Name, err)
continue

View File

@@ -50,13 +50,28 @@ const (
MaxResourcePercentage = 100
)
func validateLowNodeUtilizationParams(params *api.StrategyParameters) error {
if params == nil || params.NodeResourceUtilizationThresholds == nil {
return fmt.Errorf("NodeResourceUtilizationThresholds not set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
// to calculate nodes' utilization and not the actual resource usage.
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
// todo: move to config validation?
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if strategy.Params == nil || strategy.Params.NodeResourceUtilizationThresholds == nil {
klog.V(1).Infof("NodeResourceUtilizationThresholds not set")
if err := validateLowNodeUtilizationParams(strategy.Params); err != nil {
klog.V(1).Info(err)
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
@@ -116,7 +131,8 @@ func LowNodeUtilization(ctx context.Context, client clientset.Interface, strateg
targetNodes,
lowNodes,
targetThresholds,
podEvictor)
podEvictor,
thresholdPriority)
klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
}
@@ -196,6 +212,7 @@ func evictPodsFromTargetNodes(
targetNodes, lowNodes []NodeUsageMap,
targetThresholds api.ResourceThresholds,
podEvictor *evictions.PodEvictor,
thresholdPriority int32,
) {
sortNodesByUsage(targetNodes)
@@ -232,7 +249,7 @@ func evictPodsFromTargetNodes(
}
klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor)
nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor, thresholdPriority)
klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, removablePods:%v", len(node.allPods), len(nonRemovablePods), len(removablePods))
if len(removablePods) == 0 {
@@ -393,11 +410,11 @@ func nodeUtilization(node *v1.Node, pods []*v1.Pod) api.ResourceThresholds {
}
}
func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor) ([]*v1.Pod, []*v1.Pod) {
func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor, thresholdPriority int32) ([]*v1.Pod, []*v1.Pod) {
var nonRemovablePods, removablePods []*v1.Pod
for _, pod := range pods {
if !evictor.IsEvictable(pod) {
if !evictor.IsEvictable(pod, thresholdPriority) {
nonRemovablePods = append(nonRemovablePods, pod)
} else {
removablePods = append(removablePods, pod)

View File

@@ -28,6 +28,7 @@ import (
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) error {
@@ -38,6 +39,9 @@ func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) err
if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
@@ -48,6 +52,12 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter
klog.V(1).Info(err)
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
for _, nodeAffinity := range strategy.Params.NodeAffinityType {
klog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity)
@@ -61,7 +71,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter
client,
node,
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod) &&
return podEvictor.IsEvictable(pod, thresholdPriority) &&
!nodeutil.PodFitsCurrentNode(pod, node) &&
nodeutil.PodFitsAnyNode(pod, nodes)
}),

View File

@@ -39,6 +39,9 @@ func validateRemovePodsViolatingNodeTaintsParams(params *api.StrategyParameters)
if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
@@ -53,6 +56,11 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa
if strategy.Params != nil {
namespaces = strategy.Params.Namespaces
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
@@ -60,7 +68,9 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa
ctx,
client,
node,
podutil.WithFilter(podEvictor.IsEvictable),
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, thresholdPriority)
}),
podutil.WithNamespaces(namespaces.Include),
podutil.WithoutNamespaces(namespaces.Exclude),
)

View File

@@ -40,6 +40,9 @@ func validateRemovePodsViolatingInterPodAntiAffinityParams(params *api.StrategyP
if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
@@ -54,6 +57,11 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients
if strategy.Params != nil {
namespaces = strategy.Params.Namespaces
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
@@ -61,7 +69,9 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients
ctx,
client,
node,
podutil.WithFilter(podEvictor.IsEvictable),
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, thresholdPriority)
}),
podutil.WithNamespaces(namespaces.Include),
podutil.WithoutNamespaces(namespaces.Exclude),
)

View File

@@ -28,6 +28,7 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validatePodLifeTimeParams(params *api.StrategyParameters) error {
@@ -39,6 +40,9 @@ func validatePodLifeTimeParams(params *api.StrategyParameters) error {
if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
@@ -49,11 +53,16 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D
klog.V(1).Info(err)
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
pods := listOldPodsOnNode(ctx, client, node, strategy.Params, podEvictor)
pods := listOldPodsOnNode(ctx, client, node, strategy.Params, podEvictor, thresholdPriority)
for _, pod := range pods {
success, err := podEvictor.EvictPod(ctx, pod, node, "PodLifeTime")
if success {
@@ -69,12 +78,14 @@ func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.D
}
}
func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, params *api.StrategyParameters, evictor *evictions.PodEvictor) []*v1.Pod {
func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, params *api.StrategyParameters, podEvictor *evictions.PodEvictor, thresholdPriority int32) []*v1.Pod {
pods, err := podutil.ListPodsOnANode(
ctx,
client,
node,
podutil.WithFilter(evictor.IsEvictable),
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, thresholdPriority)
}),
podutil.WithNamespaces(params.Namespaces.Include),
podutil.WithoutNamespaces(params.Namespaces.Exclude),
)

View File

@@ -27,6 +27,7 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validateRemovePodsHavingTooManyRestartsParams(params *api.StrategyParameters) error {
@@ -38,6 +39,9 @@ func validateRemovePodsHavingTooManyRestartsParams(params *api.StrategyParameter
if len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
@@ -50,13 +54,21 @@ func RemovePodsHavingTooManyRestarts(ctx context.Context, client clientset.Inter
klog.V(1).Info(err)
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.V(1).Infof("failed to get threshold priority from strategy's params: %#v", err)
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %s", node.Name)
pods, err := podutil.ListPodsOnANode(
ctx,
client,
node,
podutil.WithFilter(podEvictor.IsEvictable),
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, thresholdPriority)
}),
podutil.WithNamespaces(strategy.Params.Namespaces.Include),
podutil.WithoutNamespaces(strategy.Params.Namespaces.Exclude),
)

View File

@@ -104,7 +104,7 @@ func GetPodSource(pod *v1.Pod) (string, error) {
return "", fmt.Errorf("cannot get source of pod %q", pod.UID)
}
// IsCriticalPod returns true if pod's priority is greater than or equal to SystemCriticalPriority.
// IsCriticalPod returns true if the pod is a static or mirror pod.
func IsCriticalPod(pod *v1.Pod) bool {
if IsStaticPod(pod) {
return true
@@ -112,17 +112,9 @@ func IsCriticalPod(pod *v1.Pod) bool {
if IsMirrorPod(pod) {
return true
}
if pod.Spec.Priority != nil && IsCriticalPodBasedOnPriority(*pod.Spec.Priority) {
return true
}
return false
}
// IsCriticalPodBasedOnPriority checks if the given pod is a critical pod based on priority resolved from pod Spec.
func IsCriticalPodBasedOnPriority(priority int32) bool {
return priority >= SystemCriticalPriority
}
// PodRequestsAndLimits returns a dictionary of all defined resources summed up for all
// containers of the pod. If PodOverhead feature is enabled, pod overhead is added to the
// total container resource requests and to the total container limits which have a

View File

@@ -1,9 +1,15 @@
package utils
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/api"
)
const SystemCriticalPriority = 2 * int32(1000000000)
@@ -33,3 +39,36 @@ func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.String, se
}
return true
}
// GetPriorityFromPriorityClass gets priority from the given priority class.
// If no priority class is provided, it will return SystemCriticalPriority by default.
func GetPriorityFromPriorityClass(ctx context.Context, client clientset.Interface, name string) (int32, error) {
if name != "" {
priorityClass, err := client.SchedulingV1().PriorityClasses().Get(ctx, name, metav1.GetOptions{})
if err != nil {
return 0, err
}
return priorityClass.Value, nil
}
return SystemCriticalPriority, nil
}
// GetPriorityFromStrategyParams gets priority from the given StrategyParameters.
// It will return SystemCriticalPriority by default.
func GetPriorityFromStrategyParams(ctx context.Context, client clientset.Interface, params *api.StrategyParameters) (priority int32, err error) {
if params == nil {
return SystemCriticalPriority, nil
}
if params.ThresholdPriority != nil {
priority = *params.ThresholdPriority
} else {
priority, err = GetPriorityFromPriorityClass(ctx, client, params.ThresholdPriorityClassName)
if err != nil {
return
}
}
if priority > SystemCriticalPriority {
return 0, fmt.Errorf("Priority threshold can't be greater than %d", SystemCriticalPriority)
}
return
}

View File

@@ -20,12 +20,14 @@ import (
"context"
"math"
"os"
"sigs.k8s.io/descheduler/pkg/utils"
"sort"
"strings"
"testing"
"time"
v1 "k8s.io/api/core/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
@@ -47,7 +49,7 @@ import (
"sigs.k8s.io/descheduler/pkg/descheduler/strategies"
)
func MakePodSpec() v1.PodSpec {
func MakePodSpec(priorityClassName string) v1.PodSpec {
return v1.PodSpec{
Containers: []v1.Container{{
Name: "pause",
@@ -65,11 +67,12 @@ func MakePodSpec() v1.PodSpec {
},
},
}},
PriorityClassName: priorityClassName,
}
}
// RcByNameContainer returns a ReplicationControoler with specified name and container
func RcByNameContainer(name, namespace string, replicas int32, labels map[string]string, gracePeriod *int64) *v1.ReplicationController {
func RcByNameContainer(name, namespace string, replicas int32, labels map[string]string, gracePeriod *int64, priorityClassName string) *v1.ReplicationController {
zeroGracePeriod := int64(0)
// Add "name": name to the labels, overwriting if it exists.
@@ -95,7 +98,7 @@ func RcByNameContainer(name, namespace string, replicas int32, labels map[string
ObjectMeta: metav1.ObjectMeta{
Labels: labels,
},
Spec: MakePodSpec(),
Spec: MakePodSpec(priorityClassName),
},
},
}
@@ -176,7 +179,7 @@ func TestLowNodeUtilization(t *testing.T) {
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
rc := RcByNameContainer("test-rc-node-utilization", testNamespace.Name, int32(15), map[string]string{"test": "node-utilization"}, nil)
rc := RcByNameContainer("test-rc-node-utilization", testNamespace.Name, int32(15), map[string]string{"test": "node-utilization"}, nil, "")
if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil {
t.Errorf("Error creating deployment %v", err)
}
@@ -185,7 +188,7 @@ func TestLowNodeUtilization(t *testing.T) {
deleteRC(ctx, t, clientSet, rc)
}
func runPodLifetimeStrategy(ctx context.Context, clientset clientset.Interface, nodeInformer coreinformers.NodeInformer, namespaces deschedulerapi.Namespaces) {
func runPodLifetimeStrategy(ctx context.Context, clientset clientset.Interface, nodeInformer coreinformers.NodeInformer, namespaces deschedulerapi.Namespaces, priorityClass string, priority *int32) {
// Run descheduler.
evictionPolicyGroupVersion, err := eutils.SupportEviction(clientset)
if err != nil || len(evictionPolicyGroupVersion) == 0 {
@@ -204,8 +207,10 @@ func runPodLifetimeStrategy(ctx context.Context, clientset clientset.Interface,
deschedulerapi.DeschedulerStrategy{
Enabled: true,
Params: &deschedulerapi.StrategyParameters{
MaxPodLifeTimeSeconds: &maxPodLifeTimeSeconds,
Namespaces: namespaces,
MaxPodLifeTimeSeconds: &maxPodLifeTimeSeconds,
Namespaces: namespaces,
ThresholdPriority: priority,
ThresholdPriorityClassName: priorityClass,
},
},
nodes,
@@ -257,7 +262,7 @@ func TestNamespaceConstraintsInclude(t *testing.T) {
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
rc := RcByNameContainer("test-rc-podlifetime", testNamespace.Name, 5, map[string]string{"test": "podlifetime-include"}, nil)
rc := RcByNameContainer("test-rc-podlifetime", testNamespace.Name, 5, map[string]string{"test": "podlifetime-include"}, nil, "")
if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil {
t.Errorf("Error creating deployment %v", err)
}
@@ -283,7 +288,7 @@ func TestNamespaceConstraintsInclude(t *testing.T) {
t.Logf("set the strategy to delete pods from %v namespace", rc.Namespace)
runPodLifetimeStrategy(ctx, clientSet, nodeInformer, deschedulerapi.Namespaces{
Include: []string{rc.Namespace},
})
}, "", nil)
// All pods are supposed to be deleted, wait until all the old pods are deleted
if err := wait.PollImmediate(time.Second, 20*time.Second, func() (bool, error) {
@@ -328,7 +333,7 @@ func TestNamespaceConstraintsExclude(t *testing.T) {
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
rc := RcByNameContainer("test-rc-podlifetime", testNamespace.Name, 5, map[string]string{"test": "podlifetime-exclude"}, nil)
rc := RcByNameContainer("test-rc-podlifetime", testNamespace.Name, 5, map[string]string{"test": "podlifetime-exclude"}, nil, "")
if _, err := clientSet.CoreV1().ReplicationControllers(rc.Namespace).Create(ctx, rc, metav1.CreateOptions{}); err != nil {
t.Errorf("Error creating deployment %v", err)
}
@@ -354,7 +359,7 @@ func TestNamespaceConstraintsExclude(t *testing.T) {
t.Logf("set the strategy to delete pods from namespaces except the %v namespace", rc.Namespace)
runPodLifetimeStrategy(ctx, clientSet, nodeInformer, deschedulerapi.Namespaces{
Exclude: []string{rc.Namespace},
})
}, "", nil)
t.Logf("Waiting 10s")
time.Sleep(10 * time.Second)
@@ -373,6 +378,145 @@ func TestNamespaceConstraintsExclude(t *testing.T) {
}
}
func TestThresholdPriority(t *testing.T) {
testPriority(t, false)
}
func TestThresholdPriorityClass(t *testing.T) {
testPriority(t, true)
}
func testPriority(t *testing.T, isPriorityClass bool) {
var highPriority = int32(1000)
var lowPriority = int32(500)
ctx := context.Background()
clientSet, nodeInformer, stopCh := initializeClient(t)
defer close(stopCh)
testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}}
if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil {
t.Fatalf("Unable to create ns %v", testNamespace.Name)
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
// create two priority classes
highPriorityClass := &schedulingv1.PriorityClass{
ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name()) + "-highpriority"},
Value: highPriority,
}
if _, err := clientSet.SchedulingV1().PriorityClasses().Create(ctx, highPriorityClass, metav1.CreateOptions{}); err != nil {
t.Fatalf("Error creating priorityclass %s: %v", highPriorityClass.Name, err)
}
defer clientSet.SchedulingV1().PriorityClasses().Delete(ctx, highPriorityClass.Name, metav1.DeleteOptions{})
lowPriorityClass := &schedulingv1.PriorityClass{
ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name()) + "-lowpriority"},
Value: lowPriority,
}
if _, err := clientSet.SchedulingV1().PriorityClasses().Create(ctx, lowPriorityClass, metav1.CreateOptions{}); err != nil {
t.Fatalf("Error creating priorityclass %s: %v", lowPriorityClass.Name, err)
}
defer clientSet.SchedulingV1().PriorityClasses().Delete(ctx, lowPriorityClass.Name, metav1.DeleteOptions{})
// create two RCs with different priority classes in the same namespace
rcHighPriority := RcByNameContainer("test-rc-podlifetime-highpriority", testNamespace.Name, 5,
map[string]string{"test": "podlifetime-highpriority"}, nil, highPriorityClass.Name)
if _, err := clientSet.CoreV1().ReplicationControllers(rcHighPriority.Namespace).Create(ctx, rcHighPriority, metav1.CreateOptions{}); err != nil {
t.Errorf("Error creating rc %s: %v", rcHighPriority.Name, err)
}
defer deleteRC(ctx, t, clientSet, rcHighPriority)
rcLowPriority := RcByNameContainer("test-rc-podlifetime-lowpriority", testNamespace.Name, 5,
map[string]string{"test": "podlifetime-lowpriority"}, nil, lowPriorityClass.Name)
if _, err := clientSet.CoreV1().ReplicationControllers(rcLowPriority.Namespace).Create(ctx, rcLowPriority, metav1.CreateOptions{}); err != nil {
t.Errorf("Error creating rc %s: %v", rcLowPriority.Name, err)
}
defer deleteRC(ctx, t, clientSet, rcLowPriority)
// wait for a while so all the pods are at least few seconds older
time.Sleep(5 * time.Second)
// it's assumed all new pods are named differently from currently running -> no name collision
podListHighPriority, err := clientSet.CoreV1().Pods(rcHighPriority.Namespace).List(
ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcHighPriority.Spec.Template.Labels).String()})
if err != nil {
t.Fatalf("Unable to list pods: %v", err)
}
podListLowPriority, err := clientSet.CoreV1().Pods(rcLowPriority.Namespace).List(
ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcLowPriority.Spec.Template.Labels).String()})
if err != nil {
t.Fatalf("Unable to list pods: %v", err)
}
if len(podListHighPriority.Items)+len(podListLowPriority.Items) != 10 {
t.Fatalf("Expected 10 replicas, got %v instead", len(podListHighPriority.Items)+len(podListLowPriority.Items))
}
expectReservePodNames := getPodNames(podListHighPriority.Items)
expectEvictPodNames := getPodNames(podListLowPriority.Items)
sort.Strings(expectReservePodNames)
sort.Strings(expectEvictPodNames)
t.Logf("Pods not expect to be evicted: %v, pods expect to be evicted: %v", expectReservePodNames, expectEvictPodNames)
if isPriorityClass {
t.Logf("set the strategy to delete pods with priority lower than priority class %s", highPriorityClass.Name)
runPodLifetimeStrategy(ctx, clientSet, nodeInformer, deschedulerapi.Namespaces{}, highPriorityClass.Name, nil)
} else {
t.Logf("set the strategy to delete pods with priority lower than %d", highPriority)
runPodLifetimeStrategy(ctx, clientSet, nodeInformer, deschedulerapi.Namespaces{}, "", &highPriority)
}
t.Logf("Waiting 10s")
time.Sleep(10 * time.Second)
// check if all pods with high priority class are not evicted
podListHighPriority, err = clientSet.CoreV1().Pods(rcHighPriority.Namespace).List(
ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcHighPriority.Spec.Template.Labels).String()})
if err != nil {
t.Fatalf("Unable to list pods after running strategy: %v", err)
}
excludePodNames := getPodNames(podListHighPriority.Items)
sort.Strings(excludePodNames)
t.Logf("Existing high priority pods: %v", excludePodNames)
// validate no pods were deleted
if len(intersectStrings(expectReservePodNames, excludePodNames)) != 5 {
t.Fatalf("None of %v high priority pods are expected to be deleted", expectReservePodNames)
}
//check if all pods with low priority class are evicted
if err := wait.PollImmediate(time.Second, 20*time.Second, func() (bool, error) {
podListLowPriority, err := clientSet.CoreV1().Pods(rcLowPriority.Namespace).List(
ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(rcLowPriority.Spec.Template.Labels).String()})
if err != nil {
return false, nil
}
includePodNames := getPodNames(podListLowPriority.Items)
// validate all pod were deleted
if len(intersectStrings(expectEvictPodNames, includePodNames)) > 0 {
t.Logf("Waiting until %v low priority pods get deleted", intersectStrings(expectEvictPodNames, includePodNames))
// check if there's at least one pod not in Terminating state
for _, pod := range podListLowPriority.Items {
// In case podList contains newly created pods
if len(intersectStrings(expectEvictPodNames, []string{pod.Name})) == 0 {
continue
}
if pod.DeletionTimestamp == nil {
t.Logf("Pod %v not in terminating state", pod.Name)
return false, nil
}
}
t.Logf("All %v pods are terminating", intersectStrings(expectEvictPodNames, includePodNames))
}
return true, nil
}); err != nil {
t.Fatalf("Error waiting for pods to be deleted: %v", err)
}
}
func TestEvictAnnotation(t *testing.T) {
ctx := context.Background()
@@ -396,7 +540,7 @@ func TestEvictAnnotation(t *testing.T) {
}
defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{})
rc := RcByNameContainer("test-rc-evict-annotation", testNamespace.Name, int32(15), map[string]string{"test": "annotation"}, nil)
rc := RcByNameContainer("test-rc-evict-annotation", testNamespace.Name, int32(15), map[string]string{"test": "annotation"}, nil, "")
rc.Spec.Template.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
rc.Spec.Template.Spec.Volumes = []v1.Volume{
{
@@ -509,7 +653,10 @@ func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface,
continue
}
// List all the pods on the current Node
podsOnANode, err := podutil.ListPodsOnANode(ctx, clientSet, node, podutil.WithFilter(podEvictor.IsEvictable))
podsOnANode, err := podutil.ListPodsOnANode(ctx, clientSet, node,
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority)
}))
if err != nil {
t.Errorf("Error listing pods on a node %v", err)
}
@@ -521,7 +668,10 @@ func evictPods(ctx context.Context, t *testing.T, clientSet clientset.Interface,
}
t.Log("Eviction of pods starting")
startEndToEndForLowNodeUtilization(ctx, clientSet, nodeInformer, podEvictor)
podsOnleastUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, leastLoadedNode, podutil.WithFilter(podEvictor.IsEvictable))
podsOnleastUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, leastLoadedNode,
podutil.WithFilter(func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod, utils.SystemCriticalPriority)
}))
if err != nil {
t.Errorf("Error listing pods on a node %v", err)
}