From 398ffa7ee03cd18b546857a612b440860acf3f40 Mon Sep 17 00:00:00 2001 From: Lucas Severo Alves Date: Wed, 28 Sep 2022 16:02:25 +0200 Subject: [PATCH] add namespace filter to nodeutilization --- README.md | 3 + .../nodeutilization/highnodeutilization.go | 1 + .../nodeutilization/lownodeutilization.go | 1 + .../lownodeutilization_test.go | 126 ++++++++++++++++++ .../nodeutilization/nodeutilization.go | 21 ++- .../plugins/nodeutilization/types.go | 8 ++ .../plugins/nodeutilization/validation.go | 11 +- .../nodeutilization/zz_generated.deepcopy.go | 10 ++ 8 files changed, 178 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0bc94b0cb..ac2b2c656 100644 --- a/README.md +++ b/README.md @@ -243,6 +243,7 @@ actual usage metrics. Implementing metrics-based descheduling is currently TODO |`thresholdPriority`|int (see [priority filtering](#priority-filtering))| |`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))| |`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))| +|`evictableNamespaces`|(see [namespace filtering](#namespace-filtering))| **Example:** @@ -315,6 +316,7 @@ actual usage metrics. Implementing metrics-based descheduling is currently TODO |`thresholdPriority`|int (see [priority filtering](#priority-filtering))| |`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))| |`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))| +|`evictableNamespaces`|(see [namespace filtering](#namespace-filtering))| **Example:** @@ -613,6 +615,7 @@ The following strategies accept a `namespaces` parameter which allows to specify * `RemoveDuplicates` * `RemovePodsViolatingTopologySpreadConstraint` * `RemoveFailedPods` +* `LowNodeUtilization` and `HighNodeUtilization` (Only filtered right before eviction, parameter named `evictableNamespaces`) For example: diff --git a/pkg/framework/plugins/nodeutilization/highnodeutilization.go b/pkg/framework/plugins/nodeutilization/highnodeutilization.go index 0c3977153..fa6b83146 100644 --- a/pkg/framework/plugins/nodeutilization/highnodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/highnodeutilization.go @@ -140,6 +140,7 @@ func (h *HighNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fr evictPodsFromSourceNodes( ctx, + h.args.EvictableNamespaces, sourceNodes, highNodes, h.handle.Evictor(), diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization.go b/pkg/framework/plugins/nodeutilization/lownodeutilization.go index b11ff4dce..b750b1d7a 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go @@ -187,6 +187,7 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra evictPodsFromSourceNodes( ctx, + l.args.EvictableNamespaces, sourceNodes, lowNodes, l.handle.Evictor(), diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go index 8cf7be5d8..a00b83340 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization_test.go @@ -58,6 +58,7 @@ func TestLowNodeUtilization(t *testing.T) { pods []*v1.Pod expectedPodsEvicted uint evictedPods []string + evictableNamespaces *api.Namespaces }{ { name: "no evictable pods", @@ -155,6 +156,130 @@ func TestLowNodeUtilization(t *testing.T) { }, expectedPodsEvicted: 4, }, + { + name: "without priorities, but excluding namespaces", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 30, + v1.ResourcePods: 30, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 50, + v1.ResourcePods: 50, + }, + nodes: []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace1" + }), + test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace1" + }), + test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace1" + }), + test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace1" + }), + test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace1" + }), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + }, + evictableNamespaces: &api.Namespaces{ + Exclude: []string{ + "namespace1", + }, + }, + expectedPodsEvicted: 0, + }, + { + name: "without priorities, but include only default namespace", + thresholds: api.ResourceThresholds{ + v1.ResourceCPU: 30, + v1.ResourcePods: 30, + }, + targetThresholds: api.ResourceThresholds{ + v1.ResourceCPU: 50, + v1.ResourcePods: 50, + }, + nodes: []*v1.Node{ + test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil), + test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil), + test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable), + }, + pods: []*v1.Pod{ + test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef), + test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace3" + }), + test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace4" + }), + test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) { + pod.Namespace = "namespace5" + }), + // These won't be evicted. + test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef), + test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) { + // A pod with local storage. + test.SetNormalOwnerRef(pod) + pod.Spec.Volumes = []v1.Volume{ + { + Name: "sample", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "somePath"}, + EmptyDir: &v1.EmptyDirVolumeSource{ + SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)}, + }, + }, + } + // A Mirror Pod. + pod.Annotations = test.GetMirrorPodAnnotation() + }), + test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) { + // A Critical Pod. + pod.Namespace = "kube-system" + priority := utils.SystemCriticalPriority + pod.Spec.Priority = &priority + }), + test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef), + }, + evictableNamespaces: &api.Namespaces{ + Include: []string{ + "default", + }, + }, + expectedPodsEvicted: 2, + }, { name: "without priorities stop when cpu capacity is depleted", thresholds: api.ResourceThresholds{ @@ -794,6 +919,7 @@ func TestLowNodeUtilization(t *testing.T) { Thresholds: test.thresholds, TargetThresholds: test.targetThresholds, UseDeviationThresholds: test.useDeviationThresholds, + EvictableNamespaces: test.evictableNamespaces, }, handle) if err != nil { diff --git a/pkg/framework/plugins/nodeutilization/nodeutilization.go b/pkg/framework/plugins/nodeutilization/nodeutilization.go index 2b7ea4a2f..fe96068f9 100644 --- a/pkg/framework/plugins/nodeutilization/nodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/nodeutilization.go @@ -24,6 +24,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" "sigs.k8s.io/descheduler/pkg/descheduler/node" @@ -212,6 +213,7 @@ func classifyNodes( // TODO: @ravig Break this function into smaller functions. func evictPodsFromSourceNodes( ctx context.Context, + evictableNamespaces *api.Namespaces, sourceNodes, destinationNodes []NodeInfo, podEvictor framework.Evictor, podFilter func(pod *v1.Pod) bool, @@ -265,13 +267,14 @@ func evictPodsFromSourceNodes( klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers") // sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers. podutil.SortPodsBasedOnPriorityLowToHigh(removablePods) - evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, continueEviction) + evictPods(ctx, evictableNamespaces, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, continueEviction) } } func evictPods( ctx context.Context, + evictableNamespaces *api.Namespaces, inputPods []*v1.Pod, nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity, @@ -280,6 +283,11 @@ func evictPods( continueEviction continueEvictionCond, ) { + var excludedNamespaces sets.String + if evictableNamespaces != nil { + excludedNamespaces = sets.NewString(evictableNamespaces.Exclude...) + } + if continueEviction(nodeInfo, totalAvailableUsage) { for _, pod := range inputPods { if !utils.PodToleratesTaints(pod, taintsOfLowNodes) { @@ -287,7 +295,16 @@ func evictPods( continue } - if podEvictor.PreEvictionFilter(pod) { + preEvictionFilterWithOptions, err := podutil.NewOptions(). + WithFilter(podEvictor.PreEvictionFilter). + WithoutNamespaces(excludedNamespaces). + BuildFilterFunc() + if err != nil { + klog.V(1).ErrorS(err, "could not build preEvictionFilter with namespace exclusion") + continue + } + + if preEvictionFilterWithOptions(pod) { if podEvictor.Evict(ctx, pod, evictions.EvictOptions{}) { klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod)) diff --git a/pkg/framework/plugins/nodeutilization/types.go b/pkg/framework/plugins/nodeutilization/types.go index f850f6470..8bdd694c1 100644 --- a/pkg/framework/plugins/nodeutilization/types.go +++ b/pkg/framework/plugins/nodeutilization/types.go @@ -28,6 +28,10 @@ type LowNodeUtilizationArgs struct { Thresholds api.ResourceThresholds TargetThresholds api.ResourceThresholds NumberOfNodes int + // Naming this one differently since namespaces are still + // considered while considering resoures used by pods + // but then filtered out before eviction + EvictableNamespaces *api.Namespaces } // +k8s:deepcopy-gen=true @@ -38,4 +42,8 @@ type HighNodeUtilizationArgs struct { Thresholds api.ResourceThresholds NumberOfNodes int + // Naming this one differently since namespaces are still + // considered while considering resoures used by pods + // but then filtered out before eviction + EvictableNamespaces *api.Namespaces } diff --git a/pkg/framework/plugins/nodeutilization/validation.go b/pkg/framework/plugins/nodeutilization/validation.go index 89969a8d9..1dd273627 100644 --- a/pkg/framework/plugins/nodeutilization/validation.go +++ b/pkg/framework/plugins/nodeutilization/validation.go @@ -19,7 +19,16 @@ import ( ) func ValidateHighNodeUtilizationArgs(args *HighNodeUtilizationArgs) error { - return validateThresholds(args.Thresholds) + // only exclude can be set, or not at all + if args.EvictableNamespaces != nil && len(args.EvictableNamespaces.Include) > 0 { + return fmt.Errorf("only Exclude namespaces can be set, inclusion is not supported") + } + err := validateThresholds(args.Thresholds) + if err != nil { + return err + } + + return nil } func ValidateLowNodeUtilizationArgs(args *LowNodeUtilizationArgs) error { diff --git a/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go b/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go index faab6a47c..efeab1816 100644 --- a/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go +++ b/pkg/framework/plugins/nodeutilization/zz_generated.deepcopy.go @@ -37,6 +37,11 @@ func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) { (*out)[key] = val } } + if in.EvictableNamespaces != nil { + in, out := &in.EvictableNamespaces, &out.EvictableNamespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } return } @@ -76,6 +81,11 @@ func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) { (*out)[key] = val } } + if in.EvictableNamespaces != nil { + in, out := &in.EvictableNamespaces, &out.EvictableNamespaces + *out = new(api.Namespaces) + (*in).DeepCopyInto(*out) + } return }